From a3a25996b11401f7589d1429225dc048d8720da9 Mon Sep 17 00:00:00 2001 From: Bertik23 <39457484+Bertik23@users.noreply.github.com> Date: Thu, 11 Sep 2025 19:17:52 +0200 Subject: [PATCH 001/734] [LLVM][MLIR] Move LSP server support library from MLIR into LLVM (#157885) This is a second PR on this patch (first #155572), that fixes the linking problem for `flang-aarch64-dylib` test. The SupportLSP library was made a component library. --- This PR moves the generic Language Server Protocol (LSP) server support code that was copied from clangd into MLIR, into the LLVM tree so it can be reused by multiple subprojects. Centralizing the generic LSP support in LLVM lowers the barrier to building new LSP servers across the LLVM ecosystem and avoids each subproject maintaining its own copy. The code originated in clangd and was copied into MLIR for its LSP server. MLIR had this code seperate to be reused by all of their LSP server. This PR relocates the MLIR copy into LLVM as a shared component into LLVM/Support. If this is not a suitable place, please suggest a better one. A follow up to this move could be deduplication with the original clangd implementation and converge on a single shared LSP support library used by clangd, MLIR, and future servers. What changes mlir/include/mlir/Tools/lsp-server-support/{Logging, Protocol, Transport}.h moved to llvm/include/llvm/Support/LSP mlir/lib/Tools/lsp-server-support/{Logging, Protocol, Transport}.cpp moved to llvm/lib/Support/LSP and their namespace was changed from mlir to llvm I ran clang-tidy --fix and clang-format on the whole moved files (last two commits), as they are basically new files and should hold up to the code style used by LLVM. MLIR LSP servers where updated to include these files from their new location and account for the namespace change. This PR is made as part of the LLVM IR LSP project ([RFC](https://discourse.llvm.org/t/rfc-ir-visualization-with-vs-code-extension-using-an-lsp-server/87773)) --- .../include/llvm/Support/LSP}/Logging.h | 38 +- .../include/llvm/Support/LSP}/Protocol.h | 23 +- llvm/include/llvm/Support/LSP/Transport.h | 289 +++++ llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/LSP/CMakeLists.txt | 8 + .../lib/Support/LSP}/Logging.cpp | 28 +- llvm/lib/Support/LSP/Protocol.cpp | 1043 +++++++++++++++++ llvm/lib/Support/LSP/Transport.cpp | 369 ++++++ llvm/unittests/Support/CMakeLists.txt | 2 + llvm/unittests/Support/LSP/CMakeLists.txt | 8 + .../unittests/Support/LSP}/Protocol.cpp | 6 +- .../unittests/Support/LSP}/Transport.cpp | 12 +- .../Tools/lsp-server-support/SourceMgrUtils.h | 12 +- .../mlir/Tools/lsp-server-support/Transport.h | 283 ----- .../mlir-lsp-server/MlirLspRegistryFunction.h | 6 +- .../Tools/lsp-server-support/CMakeLists.txt | 8 +- .../CompilationDatabase.cpp | 5 +- .../lib/Tools/lsp-server-support/Protocol.cpp | 1043 ----------------- .../lsp-server-support/SourceMgrUtils.cpp | 4 + .../Tools/lsp-server-support/Transport.cpp | 369 ------ mlir/lib/Tools/mlir-lsp-server/CMakeLists.txt | 3 + mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp | 31 +- mlir/lib/Tools/mlir-lsp-server/LSPServer.h | 6 +- mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp | 133 ++- mlir/lib/Tools/mlir-lsp-server/MLIRServer.h | 22 +- .../mlir-lsp-server/MlirLspServerMain.cpp | 8 +- mlir/lib/Tools/mlir-lsp-server/Protocol.cpp | 7 +- mlir/lib/Tools/mlir-lsp-server/Protocol.h | 6 +- .../Tools/mlir-pdll-lsp-server/CMakeLists.txt | 3 + .../Tools/mlir-pdll-lsp-server/LSPServer.cpp | 29 +- .../Tools/mlir-pdll-lsp-server/LSPServer.h | 6 +- .../MlirPdllLspServerMain.cpp | 10 +- .../Tools/mlir-pdll-lsp-server/PDLLServer.cpp | 561 ++++----- .../Tools/mlir-pdll-lsp-server/PDLLServer.h | 26 +- .../Tools/mlir-pdll-lsp-server/Protocol.cpp | 1 + .../lib/Tools/mlir-pdll-lsp-server/Protocol.h | 4 +- .../Tools/tblgen-lsp-server/CMakeLists.txt | 1 + .../lib/Tools/tblgen-lsp-server/LSPServer.cpp | 25 +- mlir/lib/Tools/tblgen-lsp-server/LSPServer.h | 6 +- .../TableGenLspServerMain.cpp | 8 +- .../tblgen-lsp-server/TableGenServer.cpp | 162 +-- .../Tools/tblgen-lsp-server/TableGenServer.h | 15 +- .../tools/mlir-lsp-server/mlir-lsp-server.cpp | 6 +- mlir/unittests/CMakeLists.txt | 1 - mlir/unittests/Tools/CMakeLists.txt | 1 - .../Tools/lsp-server-support/CMakeLists.txt | 7 - 46 files changed, 2413 insertions(+), 2232 deletions(-) rename {mlir/include/mlir/Tools/lsp-server-support => llvm/include/llvm/Support/LSP}/Logging.h (55%) rename {mlir/include/mlir/Tools/lsp-server-support => llvm/include/llvm/Support/LSP}/Protocol.h (98%) create mode 100644 llvm/include/llvm/Support/LSP/Transport.h create mode 100644 llvm/lib/Support/LSP/CMakeLists.txt rename {mlir/lib/Tools/lsp-server-support => llvm/lib/Support/LSP}/Logging.cpp (55%) create mode 100644 llvm/lib/Support/LSP/Protocol.cpp create mode 100644 llvm/lib/Support/LSP/Transport.cpp create mode 100644 llvm/unittests/Support/LSP/CMakeLists.txt rename {mlir/unittests/Tools/lsp-server-support => llvm/unittests/Support/LSP}/Protocol.cpp (93%) rename {mlir/unittests/Tools/lsp-server-support => llvm/unittests/Support/LSP}/Transport.cpp (96%) delete mode 100644 mlir/include/mlir/Tools/lsp-server-support/Transport.h delete mode 100644 mlir/lib/Tools/lsp-server-support/Protocol.cpp delete mode 100644 mlir/lib/Tools/lsp-server-support/Transport.cpp delete mode 100644 mlir/unittests/Tools/CMakeLists.txt delete mode 100644 mlir/unittests/Tools/lsp-server-support/CMakeLists.txt diff --git a/mlir/include/mlir/Tools/lsp-server-support/Logging.h b/llvm/include/llvm/Support/LSP/Logging.h similarity index 55% rename from mlir/include/mlir/Tools/lsp-server-support/Logging.h rename to llvm/include/llvm/Support/LSP/Logging.h index 9b090d05f7fa4..fe65899b1d4ce 100644 --- a/mlir/include/mlir/Tools/lsp-server-support/Logging.h +++ b/llvm/include/llvm/Support/LSP/Logging.h @@ -1,4 +1,4 @@ -//===- Logging.h - MLIR LSP Server Logging ----------------------*- C++ -*-===// +//===- Logging.h - LSP Server Logging ----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,16 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_TOOLS_LSPSERVERSUPPORT_LOGGING_H -#define MLIR_TOOLS_LSPSERVERSUPPORT_LOGGING_H +#ifndef LLVM_SUPPORT_LSP_LOGGING_H +#define LLVM_SUPPORT_LSP_LOGGING_H -#include "mlir/Support/LLVM.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include #include -namespace mlir { +namespace llvm { namespace lsp { /// This class represents the main interface for logging, and allows for @@ -26,21 +25,18 @@ class Logger { enum class Level { Debug, Info, Error }; /// Set the severity level of the logger. - static void setLogLevel(Level logLevel); + static void setLogLevel(Level LogLevel); /// Initiate a log message at various severity levels. These should be called /// after a call to `initialize`. - template - static void debug(const char *fmt, Ts &&...vals) { - log(Level::Debug, fmt, llvm::formatv(fmt, std::forward(vals)...)); + template static void debug(const char *Fmt, Ts &&...Vals) { + log(Level::Debug, Fmt, llvm::formatv(Fmt, std::forward(Vals)...)); } - template - static void info(const char *fmt, Ts &&...vals) { - log(Level::Info, fmt, llvm::formatv(fmt, std::forward(vals)...)); + template static void info(const char *Fmt, Ts &&...Vals) { + log(Level::Info, Fmt, llvm::formatv(Fmt, std::forward(Vals)...)); } - template - static void error(const char *fmt, Ts &&...vals) { - log(Level::Error, fmt, llvm::formatv(fmt, std::forward(vals)...)); + template static void error(const char *Fmt, Ts &&...Vals) { + log(Level::Error, Fmt, llvm::formatv(Fmt, std::forward(Vals)...)); } private: @@ -50,16 +46,16 @@ class Logger { static Logger &get(); /// Start a log message with the given severity level. - static void log(Level logLevel, const char *fmt, - const llvm::formatv_object_base &message); + static void log(Level LogLevel, const char *Fmt, + const llvm::formatv_object_base &Message); /// The minimum logging level. Messages with lower level are ignored. - Level logLevel = Level::Error; + Level LogLevel = Level::Error; /// A mutex used to guard logging. - std::mutex mutex; + std::mutex Mutex; }; } // namespace lsp -} // namespace mlir +} // namespace llvm -#endif // MLIR_TOOLS_LSPSERVERSUPPORT_LOGGING_H +#endif // LLVM_SUPPORT_LSP_LOGGING_H diff --git a/mlir/include/mlir/Tools/lsp-server-support/Protocol.h b/llvm/include/llvm/Support/LSP/Protocol.h similarity index 98% rename from mlir/include/mlir/Tools/lsp-server-support/Protocol.h rename to llvm/include/llvm/Support/LSP/Protocol.h index cc06dbfedb42a..93b82f1e581f8 100644 --- a/mlir/include/mlir/Tools/lsp-server-support/Protocol.h +++ b/llvm/include/llvm/Support/LSP/Protocol.h @@ -20,20 +20,24 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_TOOLS_LSPSERVERSUPPORT_PROTOCOL_H -#define MLIR_TOOLS_LSPSERVERSUPPORT_PROTOCOL_H +#ifndef LLVM_SUPPORT_LSP_PROTOCOL_H +#define LLVM_SUPPORT_LSP_PROTOCOL_H -#include "mlir/Support/LLVM.h" #include "llvm/Support/JSON.h" +#include "llvm/Support/LogicalResult.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include -#include -namespace mlir { +// This file is using the LSP syntax for identifier names which is different +// from the LLVM coding standard. To avoid the clang-tidy warnings, we're +// disabling one check here. +// NOLINTBEGIN(readability-identifier-naming) + +namespace llvm { namespace lsp { enum class ErrorCode { @@ -1241,12 +1245,11 @@ struct CodeAction { llvm::json::Value toJSON(const CodeAction &); } // namespace lsp -} // namespace mlir +} // namespace llvm namespace llvm { -template <> -struct format_provider { - static void format(const mlir::lsp::Position &pos, raw_ostream &os, +template <> struct format_provider { + static void format(const llvm::lsp::Position &pos, raw_ostream &os, StringRef style) { assert(style.empty() && "style modifiers for this type are not supported"); os << pos; @@ -1255,3 +1258,5 @@ struct format_provider { } // namespace llvm #endif + +// NOLINTEND(readability-identifier-naming) diff --git a/llvm/include/llvm/Support/LSP/Transport.h b/llvm/include/llvm/Support/LSP/Transport.h new file mode 100644 index 0000000000000..ccd7f213aa277 --- /dev/null +++ b/llvm/include/llvm/Support/LSP/Transport.h @@ -0,0 +1,289 @@ +//===--- Transport.h - Sending and Receiving LSP messages -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The language server protocol is usually implemented by writing messages as +// JSON-RPC over the stdin/stdout of a subprocess. This file contains a JSON +// transport interface that handles this communication. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_LSP_TRANSPORT_H +#define LLVM_SUPPORT_LSP_TRANSPORT_H + +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Protocol.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { +// Simple helper function that returns a string as printed from a op. +template static std::string debugString(T &&Op) { + std::string InstrStr; + llvm::raw_string_ostream Os(InstrStr); + Os << Op; + return Os.str(); +} +namespace lsp { +class MessageHandler; + +//===----------------------------------------------------------------------===// +// JSONTransport +//===----------------------------------------------------------------------===// + +/// The encoding style of the JSON-RPC messages (both input and output). +enum JSONStreamStyle { + /// Encoding per the LSP specification, with mandatory Content-Length header. + Standard, + /// Messages are delimited by a '// -----' line. Comment lines start with //. + Delimited +}; + +/// An abstract class used by the JSONTransport to read JSON message. +class JSONTransportInput { +public: + explicit JSONTransportInput(JSONStreamStyle Style = JSONStreamStyle::Standard) + : Style(Style) {} + virtual ~JSONTransportInput() = default; + + virtual bool hasError() const = 0; + virtual bool isEndOfInput() const = 0; + + /// Read in a message from the input stream. + LogicalResult readMessage(std::string &Json) { + return Style == JSONStreamStyle::Delimited ? readDelimitedMessage(Json) + : readStandardMessage(Json); + } + virtual LogicalResult readDelimitedMessage(std::string &Json) = 0; + virtual LogicalResult readStandardMessage(std::string &Json) = 0; + +private: + /// The JSON stream style to use. + JSONStreamStyle Style; +}; + +/// Concrete implementation of the JSONTransportInput that reads from a file. +class JSONTransportInputOverFile : public JSONTransportInput { +public: + explicit JSONTransportInputOverFile( + std::FILE *In, JSONStreamStyle Style = JSONStreamStyle::Standard) + : JSONTransportInput(Style), In(In) {} + + bool hasError() const final { return ferror(In); } + bool isEndOfInput() const final { return feof(In); } + + LogicalResult readDelimitedMessage(std::string &Json) final; + LogicalResult readStandardMessage(std::string &Json) final; + +private: + std::FILE *In; +}; + +/// A transport class that performs the JSON-RPC communication with the LSP +/// client. +class JSONTransport { +public: + JSONTransport(std::unique_ptr In, raw_ostream &Out, + bool PrettyOutput = false) + : In(std::move(In)), Out(Out), PrettyOutput(PrettyOutput) {} + + JSONTransport(std::FILE *In, raw_ostream &Out, + JSONStreamStyle Style = JSONStreamStyle::Standard, + bool PrettyOutput = false) + : In(std::make_unique(In, Style)), Out(Out), + PrettyOutput(PrettyOutput) {} + + /// The following methods are used to send a message to the LSP client. + void notify(StringRef Method, llvm::json::Value Params); + void call(StringRef Method, llvm::json::Value Params, llvm::json::Value Id); + void reply(llvm::json::Value Id, llvm::Expected Result); + + /// Start executing the JSON-RPC transport. + llvm::Error run(MessageHandler &Handler); + +private: + /// Dispatches the given incoming json message to the message handler. + bool handleMessage(llvm::json::Value Msg, MessageHandler &Handler); + /// Writes the given message to the output stream. + void sendMessage(llvm::json::Value Msg); + +private: + /// The input to read a message from. + std::unique_ptr In; + SmallVector OutputBuffer; + /// The output file stream. + raw_ostream &Out; + /// If the output JSON should be formatted for easier readability. + bool PrettyOutput; +}; + +//===----------------------------------------------------------------------===// +// MessageHandler +//===----------------------------------------------------------------------===// + +/// A Callback is a void function that accepts Expected. This is +/// accepted by functions that logically return T. +template +using Callback = llvm::unique_function)>; + +/// An OutgoingNotification is a function used for outgoing notifications +/// send to the client. +template +using OutgoingNotification = llvm::unique_function; + +/// An OutgoingRequest is a function used for outgoing requests to send to +/// the client. +template +using OutgoingRequest = + llvm::unique_function; + +/// An `OutgoingRequestCallback` is invoked when an outgoing request to the +/// client receives a response in turn. It is passed the original request's ID, +/// as well as the response result. +template +using OutgoingRequestCallback = + std::function)>; + +/// A handler used to process the incoming transport messages. +class MessageHandler { +public: + MessageHandler(JSONTransport &Transport) : Transport(Transport) {} + + bool onNotify(StringRef Method, llvm::json::Value Value); + bool onCall(StringRef Method, llvm::json::Value Params, llvm::json::Value Id); + bool onReply(llvm::json::Value Id, llvm::Expected Result); + + template + static llvm::Expected parse(const llvm::json::Value &Raw, + StringRef PayloadName, StringRef PayloadKind) { + T Result; + llvm::json::Path::Root Root; + if (fromJSON(Raw, Result, Root)) + return std::move(Result); + + // Dump the relevant parts of the broken message. + std::string Context; + llvm::raw_string_ostream Os(Context); + Root.printErrorContext(Raw, Os); + + // Report the error (e.g. to the client). + return llvm::make_error( + llvm::formatv("failed to decode {0} {1}: {2}", PayloadName, PayloadKind, + fmt_consume(Root.getError())), + ErrorCode::InvalidParams); + } + + template + void method(llvm::StringLiteral Method, ThisT *ThisPtr, + void (ThisT::*Handler)(const Param &, Callback)) { + MethodHandlers[Method] = [Method, Handler, + ThisPtr](llvm::json::Value RawParams, + Callback Reply) { + llvm::Expected Parameter = + parse(RawParams, Method, "request"); + if (!Parameter) + return Reply(Parameter.takeError()); + (ThisPtr->*Handler)(*Parameter, std::move(Reply)); + }; + } + + template + void notification(llvm::StringLiteral Method, ThisT *ThisPtr, + void (ThisT::*Handler)(const Param &)) { + NotificationHandlers[Method] = [Method, Handler, + ThisPtr](llvm::json::Value RawParams) { + llvm::Expected Parameter = + parse(RawParams, Method, "notification"); + if (!Parameter) { + return llvm::consumeError(llvm::handleErrors( + Parameter.takeError(), [](const LSPError &LspError) { + Logger::error("JSON parsing error: {0}", + LspError.message.c_str()); + })); + } + (ThisPtr->*Handler)(*Parameter); + }; + } + + /// Create an OutgoingNotification object used for the given method. + template + OutgoingNotification outgoingNotification(llvm::StringLiteral Method) { + return [&, Method](const T &Params) { + std::lock_guard TransportLock(TransportOutputMutex); + Logger::info("--> {0}", Method); + Transport.notify(Method, llvm::json::Value(Params)); + }; + } + + /// Create an OutgoingRequest function that, when called, sends a request with + /// the given method via the transport. Should the outgoing request be + /// met with a response, the result JSON is parsed and the response callback + /// is invoked. + template + OutgoingRequest + outgoingRequest(llvm::StringLiteral Method, + OutgoingRequestCallback Callback) { + return [&, Method, Callback](const Param &Parameter, llvm::json::Value Id) { + auto CallbackWrapper = [Method, Callback = std::move(Callback)]( + llvm::json::Value Id, + llvm::Expected Value) { + if (!Value) + return Callback(std::move(Id), Value.takeError()); + + std::string ResponseName = llvm::formatv("reply:{0}({1})", Method, Id); + llvm::Expected ParseResult = + parse(*Value, ResponseName, "response"); + if (!ParseResult) + return Callback(std::move(Id), ParseResult.takeError()); + + return Callback(std::move(Id), *ParseResult); + }; + + { + std::lock_guard Lock(ResponseHandlersMutex); + ResponseHandlers.insert( + {debugString(Id), std::make_pair(Method.str(), CallbackWrapper)}); + } + + std::lock_guard TransportLock(TransportOutputMutex); + Logger::info("--> {0}({1})", Method, Id); + Transport.call(Method, llvm::json::Value(Parameter), Id); + }; + } + +private: + template + using HandlerMap = llvm::StringMap>; + + HandlerMap NotificationHandlers; + HandlerMap)> + MethodHandlers; + + /// A pair of (1) the original request's method name, and (2) the callback + /// function to be invoked for responses. + using ResponseHandlerTy = + std::pair>; + /// A mapping from request/response ID to response handler. + llvm::StringMap ResponseHandlers; + /// Mutex to guard insertion into the response handler map. + std::mutex ResponseHandlersMutex; + + JSONTransport &Transport; + + /// Mutex to guard sending output messages to the transport. + std::mutex TransportOutputMutex; +}; + +} // namespace lsp +} // namespace llvm + +#endif diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 2528e8bd1142a..7da972f372c5b 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -135,6 +135,7 @@ if (UNIX AND "${CMAKE_SYSTEM_NAME}" MATCHES "AIX") endif() add_subdirectory(BLAKE3) +add_subdirectory(LSP) add_llvm_component_library(LLVMSupport ABIBreak.cpp diff --git a/llvm/lib/Support/LSP/CMakeLists.txt b/llvm/lib/Support/LSP/CMakeLists.txt new file mode 100644 index 0000000000000..6094d9ac315c0 --- /dev/null +++ b/llvm/lib/Support/LSP/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_component_library(LLVMSupportLSP + Protocol.cpp + Transport.cpp + Logging.cpp + + DEPENDS + LLVMSupport +) diff --git a/mlir/lib/Tools/lsp-server-support/Logging.cpp b/llvm/lib/Support/LSP/Logging.cpp similarity index 55% rename from mlir/lib/Tools/lsp-server-support/Logging.cpp rename to llvm/lib/Support/LSP/Logging.cpp index 373e2165c244d..b36621ae1c6c6 100644 --- a/mlir/lib/Tools/lsp-server-support/Logging.cpp +++ b/llvm/lib/Support/LSP/Logging.cpp @@ -6,36 +6,36 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Tools/lsp-server-support/Logging.h" +#include "llvm/Support/LSP/Logging.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/raw_ostream.h" -using namespace mlir; -using namespace mlir::lsp; +using namespace llvm; +using namespace llvm::lsp; -void Logger::setLogLevel(Level logLevel) { get().logLevel = logLevel; } +void Logger::setLogLevel(Level LogLevel) { get().LogLevel = LogLevel; } Logger &Logger::get() { - static Logger logger; - return logger; + static Logger Logger; + return Logger; } -void Logger::log(Level logLevel, const char *fmt, - const llvm::formatv_object_base &message) { - Logger &logger = get(); +void Logger::log(Level LogLevel, const char *Fmt, + const llvm::formatv_object_base &Message) { + Logger &Logger = get(); // Ignore messages with log levels below the current setting in the logger. - if (logLevel < logger.logLevel) + if (LogLevel < Logger.LogLevel) return; // An indicator character for each log level. - const char *logLevelIndicators = "DIE"; + const char *LogLevelIndicators = "DIE"; // Format the message and print to errs. - llvm::sys::TimePoint<> timestamp = std::chrono::system_clock::now(); - std::lock_guard logGuard(logger.mutex); + llvm::sys::TimePoint<> Timestamp = std::chrono::system_clock::now(); + std::lock_guard LogGuard(Logger.Mutex); llvm::errs() << llvm::formatv( "{0}[{1:%H:%M:%S.%L}] {2}\n", - logLevelIndicators[static_cast(logLevel)], timestamp, message); + LogLevelIndicators[static_cast(LogLevel)], Timestamp, Message); llvm::errs().flush(); } diff --git a/llvm/lib/Support/LSP/Protocol.cpp b/llvm/lib/Support/LSP/Protocol.cpp new file mode 100644 index 0000000000000..f22126345a435 --- /dev/null +++ b/llvm/lib/Support/LSP/Protocol.cpp @@ -0,0 +1,1043 @@ +//===--- Protocol.cpp - Language Server Protocol Implementation -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the serialization code for the LSP structs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LSP/Protocol.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::lsp; + +// Helper that doesn't treat `null` and absent fields as failures. +template +static bool mapOptOrNull(const llvm::json::Value &Params, + llvm::StringLiteral Prop, T &Out, + llvm::json::Path Path) { + const llvm::json::Object *O = Params.getAsObject(); + assert(O); + + // Field is missing or null. + auto *V = O->get(Prop); + if (!V || V->getAsNull()) + return true; + return fromJSON(*V, Out, Path.field(Prop)); +} + +//===----------------------------------------------------------------------===// +// LSPError +//===----------------------------------------------------------------------===// + +char LSPError::ID; + +//===----------------------------------------------------------------------===// +// URIForFile +//===----------------------------------------------------------------------===// + +static bool isWindowsPath(StringRef Path) { + return Path.size() > 1 && llvm::isAlpha(Path[0]) && Path[1] == ':'; +} + +static bool isNetworkPath(StringRef Path) { + return Path.size() > 2 && Path[0] == Path[1] && + llvm::sys::path::is_separator(Path[0]); +} + +static bool shouldEscapeInURI(unsigned char C) { + // Unreserved characters. + if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9')) + return false; + + switch (C) { + case '-': + case '_': + case '.': + case '~': + // '/' is only reserved when parsing. + case '/': + // ':' is only reserved for relative URI paths, which we doesn't produce. + case ':': + return false; + } + return true; +} + +/// Encodes a string according to percent-encoding. +/// - Unreserved characters are not escaped. +/// - Reserved characters always escaped with exceptions like '/'. +/// - All other characters are escaped. +static void percentEncode(StringRef Content, std::string &Out) { + for (unsigned char C : Content) { + if (shouldEscapeInURI(C)) { + Out.push_back('%'); + Out.push_back(llvm::hexdigit(C / 16)); + Out.push_back(llvm::hexdigit(C % 16)); + } else { + Out.push_back(C); + } + } +} + +/// Decodes a string according to percent-encoding. +static std::string percentDecode(StringRef Content) { + std::string Result; + for (auto I = Content.begin(), E = Content.end(); I != E; ++I) { + if (*I != '%') { + Result += *I; + continue; + } + if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) && + llvm::isHexDigit(*(I + 2))) { + Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2))); + I += 2; + } else { + Result.push_back(*I); + } + } + return Result; +} + +/// Return the set containing the supported URI schemes. +static StringSet<> &getSupportedSchemes() { + static StringSet<> Schemes({"file", "test"}); + return Schemes; +} + +/// Returns true if the given scheme is structurally valid, i.e. it does not +/// contain any invalid scheme characters. This does not check that the scheme +/// is actually supported. +static bool isStructurallyValidScheme(StringRef Scheme) { + if (Scheme.empty()) + return false; + if (!llvm::isAlpha(Scheme[0])) + return false; + return llvm::all_of(llvm::drop_begin(Scheme), [](char C) { + return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-'; + }); +} + +static llvm::Expected uriFromAbsolutePath(StringRef AbsolutePath, + StringRef Scheme) { + std::string Body; + StringRef Authority; + StringRef Root = llvm::sys::path::root_name(AbsolutePath); + if (isNetworkPath(Root)) { + // Windows UNC paths e.g. \\server\share => file://server/share + Authority = Root.drop_front(2); + AbsolutePath.consume_front(Root); + } else if (isWindowsPath(Root)) { + // Windows paths e.g. X:\path => file:///X:/path + Body = "/"; + } + Body += llvm::sys::path::convert_to_slash(AbsolutePath); + + std::string Uri = Scheme.str() + ":"; + if (Authority.empty() && Body.empty()) + return Uri; + + // If authority if empty, we only print body if it starts with "/"; otherwise, + // the URI is invalid. + if (!Authority.empty() || StringRef(Body).starts_with("/")) { + Uri.append("//"); + percentEncode(Authority, Uri); + } + percentEncode(Body, Uri); + return Uri; +} + +static llvm::Expected getAbsolutePath(StringRef Authority, + StringRef Body) { + if (!Body.starts_with("/")) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "File scheme: expect body to be an absolute path starting " + "with '/': " + + Body); + SmallString<128> Path; + if (!Authority.empty()) { + // Windows UNC paths e.g. file://server/share => \\server\share + ("//" + Authority).toVector(Path); + } else if (isWindowsPath(Body.substr(1))) { + // Windows paths e.g. file:///X:/path => X:\path + Body.consume_front("/"); + } + Path.append(Body); + llvm::sys::path::native(Path); + return std::string(Path); +} + +static llvm::Expected parseFilePathFromURI(StringRef OrigUri) { + StringRef Uri = OrigUri; + + // Decode the scheme of the URI. + size_t Pos = Uri.find(':'); + if (Pos == StringRef::npos) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "Scheme must be provided in URI: " + + OrigUri); + StringRef SchemeStr = Uri.substr(0, Pos); + std::string UriScheme = percentDecode(SchemeStr); + if (!isStructurallyValidScheme(UriScheme)) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "Invalid scheme: " + SchemeStr + + " (decoded: " + UriScheme + ")"); + Uri = Uri.substr(Pos + 1); + + // Decode the authority of the URI. + std::string UriAuthority; + if (Uri.consume_front("//")) { + Pos = Uri.find('/'); + UriAuthority = percentDecode(Uri.substr(0, Pos)); + Uri = Uri.substr(Pos); + } + + // Decode the body of the URI. + std::string UriBody = percentDecode(Uri); + + // Compute the absolute path for this uri. + if (!getSupportedSchemes().contains(UriScheme)) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "unsupported URI scheme `" + UriScheme + + "' for workspace files"); + } + return getAbsolutePath(UriAuthority, UriBody); +} + +llvm::Expected URIForFile::fromURI(StringRef Uri) { + llvm::Expected FilePath = parseFilePathFromURI(Uri); + if (!FilePath) + return FilePath.takeError(); + return URIForFile(std::move(*FilePath), Uri.str()); +} + +llvm::Expected URIForFile::fromFile(StringRef AbsoluteFilepath, + StringRef Scheme) { + llvm::Expected Uri = + uriFromAbsolutePath(AbsoluteFilepath, Scheme); + if (!Uri) + return Uri.takeError(); + return fromURI(*Uri); +} + +StringRef URIForFile::scheme() const { return uri().split(':').first; } + +void URIForFile::registerSupportedScheme(StringRef Scheme) { + getSupportedSchemes().insert(Scheme); +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, URIForFile &Result, + llvm::json::Path Path) { + if (std::optional Str = Value.getAsString()) { + llvm::Expected ExpectedUri = URIForFile::fromURI(*Str); + if (!ExpectedUri) { + Path.report("unresolvable URI"); + consumeError(ExpectedUri.takeError()); + return false; + } + Result = std::move(*ExpectedUri); + return true; + } + return false; +} + +llvm::json::Value llvm::lsp::toJSON(const URIForFile &Value) { + return Value.uri(); +} + +raw_ostream &llvm::lsp::operator<<(raw_ostream &Os, const URIForFile &Value) { + return Os << Value.uri(); +} + +//===----------------------------------------------------------------------===// +// ClientCapabilities +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + ClientCapabilities &Result, llvm::json::Path Path) { + const llvm::json::Object *O = Value.getAsObject(); + if (!O) { + Path.report("expected object"); + return false; + } + if (const llvm::json::Object *TextDocument = O->getObject("textDocument")) { + if (const llvm::json::Object *DocumentSymbol = + TextDocument->getObject("documentSymbol")) { + if (std::optional HierarchicalSupport = + DocumentSymbol->getBoolean("hierarchicalDocumentSymbolSupport")) + Result.hierarchicalDocumentSymbol = *HierarchicalSupport; + } + if (auto *CodeAction = TextDocument->getObject("codeAction")) { + if (CodeAction->getObject("codeActionLiteralSupport")) + Result.codeActionStructure = true; + } + } + if (auto *Window = O->getObject("window")) { + if (std::optional WorkDoneProgressSupport = + Window->getBoolean("workDoneProgress")) + Result.workDoneProgress = *WorkDoneProgressSupport; + } + return true; +} + +//===----------------------------------------------------------------------===// +// ClientInfo +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, ClientInfo &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + if (!O || !O.map("name", Result.name)) + return false; + + // Don't fail if we can't parse version. + O.map("version", Result.version); + return true; +} + +//===----------------------------------------------------------------------===// +// InitializeParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, TraceLevel &Result, + llvm::json::Path Path) { + if (std::optional Str = Value.getAsString()) { + if (*Str == "off") { + Result = TraceLevel::Off; + return true; + } + if (*Str == "messages") { + Result = TraceLevel::Messages; + return true; + } + if (*Str == "verbose") { + Result = TraceLevel::Verbose; + return true; + } + } + return false; +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + InitializeParams &Result, llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + if (!O) + return false; + // We deliberately don't fail if we can't parse individual fields. + O.map("capabilities", Result.capabilities); + O.map("trace", Result.trace); + mapOptOrNull(Value, "clientInfo", Result.clientInfo, Path); + + return true; +} + +//===----------------------------------------------------------------------===// +// TextDocumentItem +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + TextDocumentItem &Result, llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("uri", Result.uri) && + O.map("languageId", Result.languageId) && O.map("text", Result.text) && + O.map("version", Result.version); +} + +//===----------------------------------------------------------------------===// +// TextDocumentIdentifier +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const TextDocumentIdentifier &Value) { + return llvm::json::Object{{"uri", Value.uri}}; +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + TextDocumentIdentifier &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("uri", Result.uri); +} + +//===----------------------------------------------------------------------===// +// VersionedTextDocumentIdentifier +//===----------------------------------------------------------------------===// + +llvm::json::Value +llvm::lsp::toJSON(const VersionedTextDocumentIdentifier &Value) { + return llvm::json::Object{ + {"uri", Value.uri}, + {"version", Value.version}, + }; +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + VersionedTextDocumentIdentifier &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("uri", Result.uri) && O.map("version", Result.version); +} + +//===----------------------------------------------------------------------===// +// Position +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, Position &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("line", Result.line) && + O.map("character", Result.character); +} + +llvm::json::Value llvm::lsp::toJSON(const Position &Value) { + return llvm::json::Object{ + {"line", Value.line}, + {"character", Value.character}, + }; +} + +raw_ostream &llvm::lsp::operator<<(raw_ostream &Os, const Position &Value) { + return Os << Value.line << ':' << Value.character; +} + +//===----------------------------------------------------------------------===// +// Range +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, Range &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("start", Result.start) && O.map("end", Result.end); +} + +llvm::json::Value llvm::lsp::toJSON(const Range &Value) { + return llvm::json::Object{ + {"start", Value.start}, + {"end", Value.end}, + }; +} + +raw_ostream &llvm::lsp::operator<<(raw_ostream &Os, const Range &Value) { + return Os << Value.start << '-' << Value.end; +} + +//===----------------------------------------------------------------------===// +// Location +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, Location &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("uri", Result.uri) && O.map("range", Result.range); +} + +llvm::json::Value llvm::lsp::toJSON(const Location &Value) { + return llvm::json::Object{ + {"uri", Value.uri}, + {"range", Value.range}, + }; +} + +raw_ostream &llvm::lsp::operator<<(raw_ostream &Os, const Location &Value) { + return Os << Value.range << '@' << Value.uri; +} + +//===----------------------------------------------------------------------===// +// TextDocumentPositionParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + TextDocumentPositionParams &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("textDocument", Result.textDocument) && + O.map("position", Result.position); +} + +//===----------------------------------------------------------------------===// +// ReferenceParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + ReferenceContext &Result, llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.mapOptional("includeDeclaration", Result.includeDeclaration); +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + ReferenceParams &Result, llvm::json::Path Path) { + TextDocumentPositionParams &Base = Result; + llvm::json::ObjectMapper O(Value, Path); + return fromJSON(Value, Base, Path) && O && + O.mapOptional("context", Result.context); +} + +//===----------------------------------------------------------------------===// +// DidOpenTextDocumentParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + DidOpenTextDocumentParams &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("textDocument", Result.textDocument); +} + +//===----------------------------------------------------------------------===// +// DidCloseTextDocumentParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + DidCloseTextDocumentParams &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("textDocument", Result.textDocument); +} + +//===----------------------------------------------------------------------===// +// DidChangeTextDocumentParams +//===----------------------------------------------------------------------===// + +LogicalResult +TextDocumentContentChangeEvent::applyTo(std::string &Contents) const { + // If there is no range, the full document changed. + if (!range) { + Contents = text; + return success(); + } + + // Try to map the replacement range to the content. + llvm::SourceMgr TmpScrMgr; + TmpScrMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(Contents), + SMLoc()); + SMRange RangeLoc = range->getAsSMRange(TmpScrMgr); + if (!RangeLoc.isValid()) + return failure(); + + Contents.replace(RangeLoc.Start.getPointer() - Contents.data(), + RangeLoc.End.getPointer() - RangeLoc.Start.getPointer(), + text); + return success(); +} + +LogicalResult TextDocumentContentChangeEvent::applyTo( + ArrayRef Changes, std::string &Contents) { + for (const auto &Change : Changes) + if (failed(Change.applyTo(Contents))) + return failure(); + return success(); +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + TextDocumentContentChangeEvent &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("range", Result.range) && + O.map("rangeLength", Result.rangeLength) && O.map("text", Result.text); +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + DidChangeTextDocumentParams &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("textDocument", Result.textDocument) && + O.map("contentChanges", Result.contentChanges); +} + +//===----------------------------------------------------------------------===// +// MarkupContent +//===----------------------------------------------------------------------===// + +static llvm::StringRef toTextKind(MarkupKind Kind) { + switch (Kind) { + case MarkupKind::PlainText: + return "plaintext"; + case MarkupKind::Markdown: + return "markdown"; + } + llvm_unreachable("Invalid MarkupKind"); +} + +raw_ostream &llvm::lsp::operator<<(raw_ostream &Os, MarkupKind Kind) { + return Os << toTextKind(Kind); +} + +llvm::json::Value llvm::lsp::toJSON(const MarkupContent &Mc) { + if (Mc.value.empty()) + return nullptr; + + return llvm::json::Object{ + {"kind", toTextKind(Mc.kind)}, + {"value", Mc.value}, + }; +} + +//===----------------------------------------------------------------------===// +// Hover +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const Hover &Hover) { + llvm::json::Object Result{{"contents", toJSON(Hover.contents)}}; + if (Hover.range) + Result["range"] = toJSON(*Hover.range); + return std::move(Result); +} + +//===----------------------------------------------------------------------===// +// DocumentSymbol +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const DocumentSymbol &Symbol) { + llvm::json::Object Result{{"name", Symbol.name}, + {"kind", static_cast(Symbol.kind)}, + {"range", Symbol.range}, + {"selectionRange", Symbol.selectionRange}}; + + if (!Symbol.detail.empty()) + Result["detail"] = Symbol.detail; + if (!Symbol.children.empty()) + Result["children"] = Symbol.children; + return std::move(Result); +} + +//===----------------------------------------------------------------------===// +// DocumentSymbolParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + DocumentSymbolParams &Result, llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("textDocument", Result.textDocument); +} + +//===----------------------------------------------------------------------===// +// DiagnosticRelatedInformation +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + DiagnosticRelatedInformation &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("location", Result.location) && + O.map("message", Result.message); +} + +llvm::json::Value llvm::lsp::toJSON(const DiagnosticRelatedInformation &Info) { + return llvm::json::Object{ + {"location", Info.location}, + {"message", Info.message}, + }; +} + +//===----------------------------------------------------------------------===// +// Diagnostic +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(DiagnosticTag Tag) { + return static_cast(Tag); +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, DiagnosticTag &Result, + llvm::json::Path Path) { + if (std::optional I = Value.getAsInteger()) { + Result = (DiagnosticTag)*I; + return true; + } + + return false; +} + +llvm::json::Value llvm::lsp::toJSON(const Diagnostic &Diag) { + llvm::json::Object Result{ + {"range", Diag.range}, + {"severity", (int)Diag.severity}, + {"message", Diag.message}, + }; + if (Diag.category) + Result["category"] = *Diag.category; + if (!Diag.source.empty()) + Result["source"] = Diag.source; + if (Diag.relatedInformation) + Result["relatedInformation"] = *Diag.relatedInformation; + if (!Diag.tags.empty()) + Result["tags"] = Diag.tags; + return std::move(Result); +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, Diagnostic &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + if (!O) + return false; + int Severity = 0; + if (!mapOptOrNull(Value, "severity", Severity, Path)) + return false; + Result.severity = (DiagnosticSeverity)Severity; + + return O.map("range", Result.range) && O.map("message", Result.message) && + mapOptOrNull(Value, "category", Result.category, Path) && + mapOptOrNull(Value, "source", Result.source, Path) && + mapOptOrNull(Value, "relatedInformation", Result.relatedInformation, + Path) && + mapOptOrNull(Value, "tags", Result.tags, Path); +} + +//===----------------------------------------------------------------------===// +// PublishDiagnosticsParams +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const PublishDiagnosticsParams &Params) { + return llvm::json::Object{ + {"uri", Params.uri}, + {"diagnostics", Params.diagnostics}, + {"version", Params.version}, + }; +} + +//===----------------------------------------------------------------------===// +// TextEdit +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, TextEdit &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("range", Result.range) && O.map("newText", Result.newText); +} + +llvm::json::Value llvm::lsp::toJSON(const TextEdit &Value) { + return llvm::json::Object{ + {"range", Value.range}, + {"newText", Value.newText}, + }; +} + +raw_ostream &llvm::lsp::operator<<(raw_ostream &Os, const TextEdit &Value) { + Os << Value.range << " => \""; + llvm::printEscapedString(Value.newText, Os); + return Os << '"'; +} + +//===----------------------------------------------------------------------===// +// CompletionItemKind +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + CompletionItemKind &Result, llvm::json::Path Path) { + if (std::optional IntValue = Value.getAsInteger()) { + if (*IntValue < static_cast(CompletionItemKind::Text) || + *IntValue > static_cast(CompletionItemKind::TypeParameter)) + return false; + Result = static_cast(*IntValue); + return true; + } + return false; +} + +CompletionItemKind llvm::lsp::adjustKindToCapability( + CompletionItemKind Kind, + CompletionItemKindBitset &SupportedCompletionItemKinds) { + size_t KindVal = static_cast(Kind); + if (KindVal >= kCompletionItemKindMin && + KindVal <= SupportedCompletionItemKinds.size() && + SupportedCompletionItemKinds[KindVal]) + return Kind; + + // Provide some fall backs for common kinds that are close enough. + switch (Kind) { + case CompletionItemKind::Folder: + return CompletionItemKind::File; + case CompletionItemKind::EnumMember: + return CompletionItemKind::Enum; + case CompletionItemKind::Struct: + return CompletionItemKind::Class; + default: + return CompletionItemKind::Text; + } +} + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + CompletionItemKindBitset &Result, + llvm::json::Path Path) { + if (const llvm::json::Array *ArrayValue = Value.getAsArray()) { + for (size_t I = 0, E = ArrayValue->size(); I < E; ++I) { + CompletionItemKind KindOut; + if (fromJSON((*ArrayValue)[I], KindOut, Path.index(I))) + Result.set(size_t(KindOut)); + } + return true; + } + return false; +} + +//===----------------------------------------------------------------------===// +// CompletionItem +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const CompletionItem &Value) { + assert(!Value.label.empty() && "completion item label is required"); + llvm::json::Object Result{{"label", Value.label}}; + if (Value.kind != CompletionItemKind::Missing) + Result["kind"] = static_cast(Value.kind); + if (!Value.detail.empty()) + Result["detail"] = Value.detail; + if (Value.documentation) + Result["documentation"] = Value.documentation; + if (!Value.sortText.empty()) + Result["sortText"] = Value.sortText; + if (!Value.filterText.empty()) + Result["filterText"] = Value.filterText; + if (!Value.insertText.empty()) + Result["insertText"] = Value.insertText; + if (Value.insertTextFormat != InsertTextFormat::Missing) + Result["insertTextFormat"] = static_cast(Value.insertTextFormat); + if (Value.textEdit) + Result["textEdit"] = *Value.textEdit; + if (!Value.additionalTextEdits.empty()) { + Result["additionalTextEdits"] = + llvm::json::Array(Value.additionalTextEdits); + } + if (Value.deprecated) + Result["deprecated"] = Value.deprecated; + return std::move(Result); +} + +raw_ostream &llvm::lsp::operator<<(raw_ostream &Os, + const CompletionItem &Value) { + return Os << Value.label << " - " << toJSON(Value); +} + +bool llvm::lsp::operator<(const CompletionItem &Lhs, + const CompletionItem &Rhs) { + return (Lhs.sortText.empty() ? Lhs.label : Lhs.sortText) < + (Rhs.sortText.empty() ? Rhs.label : Rhs.sortText); +} + +//===----------------------------------------------------------------------===// +// CompletionList +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const CompletionList &Value) { + return llvm::json::Object{ + {"isIncomplete", Value.isIncomplete}, + {"items", llvm::json::Array(Value.items)}, + }; +} + +//===----------------------------------------------------------------------===// +// CompletionContext +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + CompletionContext &Result, llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + int TriggerKind; + if (!O || !O.map("triggerKind", TriggerKind) || + !mapOptOrNull(Value, "triggerCharacter", Result.triggerCharacter, Path)) + return false; + Result.triggerKind = static_cast(TriggerKind); + return true; +} + +//===----------------------------------------------------------------------===// +// CompletionParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + CompletionParams &Result, llvm::json::Path Path) { + if (!fromJSON(Value, static_cast(Result), Path)) + return false; + if (const llvm::json::Value *Context = Value.getAsObject()->get("context")) + return fromJSON(*Context, Result.context, Path.field("context")); + return true; +} + +//===----------------------------------------------------------------------===// +// ParameterInformation +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const ParameterInformation &Value) { + assert((Value.labelOffsets || !Value.labelString.empty()) && + "parameter information label is required"); + llvm::json::Object Result; + if (Value.labelOffsets) + Result["label"] = llvm::json::Array( + {Value.labelOffsets->first, Value.labelOffsets->second}); + else + Result["label"] = Value.labelString; + if (!Value.documentation.empty()) + Result["documentation"] = Value.documentation; + return std::move(Result); +} + +//===----------------------------------------------------------------------===// +// SignatureInformation +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const SignatureInformation &Value) { + assert(!Value.label.empty() && "signature information label is required"); + llvm::json::Object Result{ + {"label", Value.label}, + {"parameters", llvm::json::Array(Value.parameters)}, + }; + if (!Value.documentation.empty()) + Result["documentation"] = Value.documentation; + return std::move(Result); +} + +raw_ostream &llvm::lsp::operator<<(raw_ostream &Os, + const SignatureInformation &Value) { + return Os << Value.label << " - " << toJSON(Value); +} + +//===----------------------------------------------------------------------===// +// SignatureHelp +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const SignatureHelp &Value) { + assert(Value.activeSignature >= 0 && + "Unexpected negative value for number of active signatures."); + assert(Value.activeParameter >= 0 && + "Unexpected negative value for active parameter index"); + return llvm::json::Object{ + {"activeSignature", Value.activeSignature}, + {"activeParameter", Value.activeParameter}, + {"signatures", llvm::json::Array(Value.signatures)}, + }; +} + +//===----------------------------------------------------------------------===// +// DocumentLinkParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + DocumentLinkParams &Result, llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("textDocument", Result.textDocument); +} + +//===----------------------------------------------------------------------===// +// DocumentLink +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const DocumentLink &Value) { + return llvm::json::Object{ + {"range", Value.range}, + {"target", Value.target}, + }; +} + +//===----------------------------------------------------------------------===// +// InlayHintsParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + InlayHintsParams &Result, llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("textDocument", Result.textDocument) && + O.map("range", Result.range); +} + +//===----------------------------------------------------------------------===// +// InlayHint +//===----------------------------------------------------------------------===// + +llvm::json::Value llvm::lsp::toJSON(const InlayHint &Value) { + return llvm::json::Object{{"position", Value.position}, + {"kind", (int)Value.kind}, + {"label", Value.label}, + {"paddingLeft", Value.paddingLeft}, + {"paddingRight", Value.paddingRight}}; +} +bool llvm::lsp::operator==(const InlayHint &Lhs, const InlayHint &Rhs) { + return std::tie(Lhs.position, Lhs.kind, Lhs.label) == + std::tie(Rhs.position, Rhs.kind, Rhs.label); +} +bool llvm::lsp::operator<(const InlayHint &Lhs, const InlayHint &Rhs) { + return std::tie(Lhs.position, Lhs.kind, Lhs.label) < + std::tie(Rhs.position, Rhs.kind, Rhs.label); +} + +llvm::raw_ostream &llvm::lsp::operator<<(llvm::raw_ostream &Os, + InlayHintKind Value) { + switch (Value) { + case InlayHintKind::Parameter: + return Os << "parameter"; + case InlayHintKind::Type: + return Os << "type"; + } + llvm_unreachable("Unknown InlayHintKind"); +} + +//===----------------------------------------------------------------------===// +// CodeActionContext +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + CodeActionContext &Result, llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + if (!O || !O.map("diagnostics", Result.diagnostics)) + return false; + O.map("only", Result.only); + return true; +} + +//===----------------------------------------------------------------------===// +// CodeActionParams +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, + CodeActionParams &Result, llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("textDocument", Result.textDocument) && + O.map("range", Result.range) && O.map("context", Result.context); +} + +//===----------------------------------------------------------------------===// +// WorkspaceEdit +//===----------------------------------------------------------------------===// + +bool llvm::lsp::fromJSON(const llvm::json::Value &Value, WorkspaceEdit &Result, + llvm::json::Path Path) { + llvm::json::ObjectMapper O(Value, Path); + return O && O.map("changes", Result.changes); +} + +llvm::json::Value llvm::lsp::toJSON(const WorkspaceEdit &Value) { + llvm::json::Object FileChanges; + for (auto &Change : Value.changes) + FileChanges[Change.first] = llvm::json::Array(Change.second); + return llvm::json::Object{{"changes", std::move(FileChanges)}}; +} + +//===----------------------------------------------------------------------===// +// CodeAction +//===----------------------------------------------------------------------===// + +const llvm::StringLiteral CodeAction::kQuickFix = "quickfix"; +const llvm::StringLiteral CodeAction::kRefactor = "refactor"; +const llvm::StringLiteral CodeAction::kInfo = "info"; + +llvm::json::Value llvm::lsp::toJSON(const CodeAction &Value) { + llvm::json::Object CodeAction{{"title", Value.title}}; + if (Value.kind) + CodeAction["kind"] = *Value.kind; + if (Value.diagnostics) + CodeAction["diagnostics"] = llvm::json::Array(*Value.diagnostics); + if (Value.isPreferred) + CodeAction["isPreferred"] = true; + if (Value.edit) + CodeAction["edit"] = *Value.edit; + return std::move(CodeAction); +} diff --git a/llvm/lib/Support/LSP/Transport.cpp b/llvm/lib/Support/LSP/Transport.cpp new file mode 100644 index 0000000000000..e71f17701636b --- /dev/null +++ b/llvm/lib/Support/LSP/Transport.cpp @@ -0,0 +1,369 @@ +//===--- JSONTransport.cpp - sending and receiving LSP messages over JSON -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LSP/Transport.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Protocol.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::lsp; + +//===----------------------------------------------------------------------===// +// Reply +//===----------------------------------------------------------------------===// + +namespace { +/// Function object to reply to an LSP call. +/// Each instance must be called exactly once, otherwise: +/// - if there was no reply, an error reply is sent +/// - if there were multiple replies, only the first is sent +class Reply { +public: + Reply(const llvm::json::Value &Id, StringRef Method, JSONTransport &Transport, + std::mutex &TransportOutputMutex); + Reply(Reply &&Other); + Reply &operator=(Reply &&) = delete; + Reply(const Reply &) = delete; + Reply &operator=(const Reply &) = delete; + + void operator()(llvm::Expected Reply); + +private: + std::string Method; + std::atomic Replied = {false}; + llvm::json::Value Id; + JSONTransport *Transport; + std::mutex &TransportOutputMutex; +}; +} // namespace + +Reply::Reply(const llvm::json::Value &Id, llvm::StringRef Method, + JSONTransport &Transport, std::mutex &TransportOutputMutex) + : Method(Method), Id(Id), Transport(&Transport), + TransportOutputMutex(TransportOutputMutex) {} + +Reply::Reply(Reply &&Other) + : Method(Other.Method), Replied(Other.Replied.load()), + Id(std::move(Other.Id)), Transport(Other.Transport), + TransportOutputMutex(Other.TransportOutputMutex) { + Other.Transport = nullptr; +} + +void Reply::operator()(llvm::Expected Reply) { + if (Replied.exchange(true)) { + Logger::error("Replied twice to message {0}({1})", Method, Id); + assert(false && "must reply to each call only once!"); + return; + } + assert(Transport && "expected valid transport to reply to"); + + std::lock_guard TransportLock(TransportOutputMutex); + if (Reply) { + Logger::info("--> reply:{0}({1})", Method, Id); + Transport->reply(std::move(Id), std::move(Reply)); + } else { + llvm::Error Error = Reply.takeError(); + Logger::info("--> reply:{0}({1}): {2}", Method, Id, Error); + Transport->reply(std::move(Id), std::move(Error)); + } +} + +//===----------------------------------------------------------------------===// +// MessageHandler +//===----------------------------------------------------------------------===// + +bool MessageHandler::onNotify(llvm::StringRef Method, llvm::json::Value Value) { + Logger::info("--> {0}", Method); + + if (Method == "exit") + return false; + if (Method == "$cancel") { + // TODO: Add support for cancelling requests. + } else { + auto It = NotificationHandlers.find(Method); + if (It != NotificationHandlers.end()) + It->second(std::move(Value)); + } + return true; +} + +bool MessageHandler::onCall(llvm::StringRef Method, llvm::json::Value Params, + llvm::json::Value Id) { + Logger::info("--> {0}({1})", Method, Id); + + Reply Reply(Id, Method, Transport, TransportOutputMutex); + + auto It = MethodHandlers.find(Method); + if (It != MethodHandlers.end()) { + It->second(std::move(Params), std::move(Reply)); + } else { + Reply(llvm::make_error("method not found: " + Method.str(), + ErrorCode::MethodNotFound)); + } + return true; +} + +bool MessageHandler::onReply(llvm::json::Value Id, + llvm::Expected Result) { + // Find the response handler in the mapping. If it exists, move it out of the + // mapping and erase it. + ResponseHandlerTy ResponseHandler; + { + std::lock_guard responseHandlersLock(ResponseHandlerTy); + auto It = ResponseHandlers.find(debugString(Id)); + if (It != ResponseHandlers.end()) { + ResponseHandler = std::move(It->second); + ResponseHandlers.erase(It); + } + } + + // If we found a response handler, invoke it. Otherwise, log an error. + if (ResponseHandler.second) { + Logger::info("--> reply:{0}({1})", ResponseHandler.first, Id); + ResponseHandler.second(std::move(Id), std::move(Result)); + } else { + Logger::error( + "received a reply with ID {0}, but there was no such outgoing request", + Id); + if (!Result) + llvm::consumeError(Result.takeError()); + } + return true; +} + +//===----------------------------------------------------------------------===// +// JSONTransport +//===----------------------------------------------------------------------===// + +/// Encode the given error as a JSON object. +static llvm::json::Object encodeError(llvm::Error Error) { + std::string Message; + ErrorCode Code = ErrorCode::UnknownErrorCode; + auto HandlerFn = [&](const LSPError &LspError) -> llvm::Error { + Message = LspError.message; + Code = LspError.code; + return llvm::Error::success(); + }; + if (llvm::Error Unhandled = llvm::handleErrors(std::move(Error), HandlerFn)) + Message = llvm::toString(std::move(Unhandled)); + + return llvm::json::Object{ + {"message", std::move(Message)}, + {"code", int64_t(Code)}, + }; +} + +/// Decode the given JSON object into an error. +llvm::Error decodeError(const llvm::json::Object &O) { + StringRef Msg = O.getString("message").value_or("Unspecified error"); + if (std::optional Code = O.getInteger("code")) + return llvm::make_error(Msg.str(), ErrorCode(*Code)); + return llvm::make_error(llvm::inconvertibleErrorCode(), + Msg.str()); +} + +void JSONTransport::notify(StringRef Method, llvm::json::Value Params) { + sendMessage(llvm::json::Object{ + {"jsonrpc", "2.0"}, + {"method", Method}, + {"params", std::move(Params)}, + }); +} +void JSONTransport::call(StringRef Method, llvm::json::Value Params, + llvm::json::Value Id) { + sendMessage(llvm::json::Object{ + {"jsonrpc", "2.0"}, + {"id", std::move(Id)}, + {"method", Method}, + {"params", std::move(Params)}, + }); +} +void JSONTransport::reply(llvm::json::Value Id, + llvm::Expected Result) { + if (Result) { + return sendMessage(llvm::json::Object{ + {"jsonrpc", "2.0"}, + {"id", std::move(Id)}, + {"result", std::move(*Result)}, + }); + } + + sendMessage(llvm::json::Object{ + {"jsonrpc", "2.0"}, + {"id", std::move(Id)}, + {"error", encodeError(Result.takeError())}, + }); +} + +llvm::Error JSONTransport::run(MessageHandler &Handler) { + std::string Json; + while (!In->isEndOfInput()) { + if (In->hasError()) { + return llvm::errorCodeToError( + std::error_code(errno, std::system_category())); + } + + if (succeeded(In->readMessage(Json))) { + if (llvm::Expected Doc = llvm::json::parse(Json)) { + if (!handleMessage(std::move(*Doc), Handler)) + return llvm::Error::success(); + } else { + Logger::error("JSON parse error: {0}", llvm::toString(Doc.takeError())); + } + } + } + return llvm::errorCodeToError(std::make_error_code(std::errc::io_error)); +} + +void JSONTransport::sendMessage(llvm::json::Value Msg) { + OutputBuffer.clear(); + llvm::raw_svector_ostream os(OutputBuffer); + os << llvm::formatv(PrettyOutput ? "{0:2}\n" : "{0}", Msg); + Out << "Content-Length: " << OutputBuffer.size() << "\r\n\r\n" + << OutputBuffer; + Out.flush(); + Logger::debug(">>> {0}\n", OutputBuffer); +} + +bool JSONTransport::handleMessage(llvm::json::Value Msg, + MessageHandler &Handler) { + // Message must be an object with "jsonrpc":"2.0". + llvm::json::Object *Object = Msg.getAsObject(); + if (!Object || + Object->getString("jsonrpc") != std::optional("2.0")) + return false; + + // `id` may be any JSON value. If absent, this is a notification. + std::optional Id; + if (llvm::json::Value *I = Object->get("id")) + Id = std::move(*I); + std::optional Method = Object->getString("method"); + + // This is a response. + if (!Method) { + if (!Id) + return false; + if (auto *Err = Object->getObject("error")) + return Handler.onReply(std::move(*Id), decodeError(*Err)); + // result should be given, use null if not. + llvm::json::Value Result = nullptr; + if (llvm::json::Value *R = Object->get("result")) + Result = std::move(*R); + return Handler.onReply(std::move(*Id), std::move(Result)); + } + + // Params should be given, use null if not. + llvm::json::Value Params = nullptr; + if (llvm::json::Value *P = Object->get("params")) + Params = std::move(*P); + + if (Id) + return Handler.onCall(*Method, std::move(Params), std::move(*Id)); + return Handler.onNotify(*Method, std::move(Params)); +} + +/// Tries to read a line up to and including \n. +/// If failing, feof(), ferror(), or shutdownRequested() will be set. +LogicalResult readLine(std::FILE *In, SmallVectorImpl &Out) { + // Big enough to hold any reasonable header line. May not fit content lines + // in delimited mode, but performance doesn't matter for that mode. + static constexpr int BufSize = 128; + size_t Size = 0; + Out.clear(); + for (;;) { + Out.resize_for_overwrite(Size + BufSize); + if (!std::fgets(&Out[Size], BufSize, In)) + return failure(); + + clearerr(In); + + // If the line contained null bytes, anything after it (including \n) will + // be ignored. Fortunately this is not a legal header or JSON. + size_t Read = std::strlen(&Out[Size]); + if (Read > 0 && Out[Size + Read - 1] == '\n') { + Out.resize(Size + Read); + return success(); + } + Size += Read; + } +} + +// Returns std::nullopt when: +// - ferror(), feof(), or shutdownRequested() are set. +// - Content-Length is missing or empty (protocol error) +LogicalResult +JSONTransportInputOverFile::readStandardMessage(std::string &Json) { + // A Language Server Protocol message starts with a set of HTTP headers, + // delimited by \r\n, and terminated by an empty line (\r\n). + unsigned long long ContentLength = 0; + llvm::SmallString<128> Line; + while (true) { + if (feof(In) || hasError() || failed(readLine(In, Line))) + return failure(); + + // Content-Length is a mandatory header, and the only one we handle. + StringRef LineRef = Line; + if (LineRef.consume_front("Content-Length: ")) { + llvm::getAsUnsignedInteger(LineRef.trim(), 0, ContentLength); + } else if (!LineRef.trim().empty()) { + // It's another header, ignore it. + continue; + } else { + // An empty line indicates the end of headers. Go ahead and read the JSON. + break; + } + } + + // The fuzzer likes crashing us by sending "Content-Length: 9999999999999999" + if (ContentLength == 0 || ContentLength > 1 << 30) + return failure(); + + Json.resize(ContentLength); + for (size_t Pos = 0, Read; Pos < ContentLength; Pos += Read) { + Read = std::fread(&Json[Pos], 1, ContentLength - Pos, In); + if (Read == 0) + return failure(); + + // If we're done, the error was transient. If we're not done, either it was + // transient or we'll see it again on retry. + clearerr(In); + Pos += Read; + } + return success(); +} + +/// For lit tests we support a simplified syntax: +/// - messages are delimited by '// -----' on a line by itself +/// - lines starting with // are ignored. +/// This is a testing path, so favor simplicity over performance here. +/// When returning failure: feof(), ferror(), or shutdownRequested() will be +/// set. +LogicalResult +JSONTransportInputOverFile::readDelimitedMessage(std::string &Json) { + Json.clear(); + llvm::SmallString<128> Line; + while (succeeded(readLine(In, Line))) { + StringRef LineRef = Line.str().trim(); + if (LineRef.starts_with("//")) { + // Found a delimiter for the message. + if (LineRef == "// -----") + break; + continue; + } + + Json += Line; + } + + return failure(ferror(In)); +} diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index 0910a0b296dd0..d1dfb1dc4a722 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -125,6 +125,8 @@ add_llvm_unittest(SupportTests intrinsics_gen ) +add_subdirectory(LSP) + target_link_libraries(SupportTests PRIVATE LLVMTestingSupport) # Disable all warning for AlignOfTest.cpp, diff --git a/llvm/unittests/Support/LSP/CMakeLists.txt b/llvm/unittests/Support/LSP/CMakeLists.txt new file mode 100644 index 0000000000000..790a8b725469b --- /dev/null +++ b/llvm/unittests/Support/LSP/CMakeLists.txt @@ -0,0 +1,8 @@ +set(LLVM_LINK_COMPONENTS + SupportLSP +) + +add_llvm_unittest(LLVMSupportLSPTests + Protocol.cpp + Transport.cpp +) diff --git a/mlir/unittests/Tools/lsp-server-support/Protocol.cpp b/llvm/unittests/Support/LSP/Protocol.cpp similarity index 93% rename from mlir/unittests/Tools/lsp-server-support/Protocol.cpp rename to llvm/unittests/Support/LSP/Protocol.cpp index 04d7b2fbb440f..43c548c24b38b 100644 --- a/mlir/unittests/Tools/lsp-server-support/Protocol.cpp +++ b/llvm/unittests/Support/LSP/Protocol.cpp @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Tools/lsp-server-support/Protocol.h" +#include "llvm/Support/LSP/Protocol.h" #include "gtest/gtest.h" -using namespace mlir; -using namespace mlir::lsp; +using namespace llvm; +using namespace llvm::lsp; using namespace testing; namespace { diff --git a/mlir/unittests/Tools/lsp-server-support/Transport.cpp b/llvm/unittests/Support/LSP/Transport.cpp similarity index 96% rename from mlir/unittests/Tools/lsp-server-support/Transport.cpp rename to llvm/unittests/Support/LSP/Transport.cpp index 92581bd2bad08..514e93e983523 100644 --- a/mlir/unittests/Tools/lsp-server-support/Transport.cpp +++ b/llvm/unittests/Support/LSP/Transport.cpp @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Tools/lsp-server-support/Transport.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Protocol.h" +#include "llvm/Support/LSP/Transport.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Protocol.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -using namespace mlir; -using namespace mlir::lsp; +using namespace llvm; +using namespace llvm::lsp; using namespace testing; namespace { @@ -88,7 +88,7 @@ class TransportInputTest : public Test { TEST_F(TransportInputTest, RequestWithInvalidParams) { struct Handler { void onMethod(const TextDocumentItem ¶ms, - mlir::lsp::Callback callback) {} + llvm::lsp::Callback callback) {} } handler; getMessageHandler().method("invalid-params-request", &handler, &Handler::onMethod); diff --git a/mlir/include/mlir/Tools/lsp-server-support/SourceMgrUtils.h b/mlir/include/mlir/Tools/lsp-server-support/SourceMgrUtils.h index 9ed8326a602e6..920ce831e42b6 100644 --- a/mlir/include/mlir/Tools/lsp-server-support/SourceMgrUtils.h +++ b/mlir/include/mlir/Tools/lsp-server-support/SourceMgrUtils.h @@ -14,7 +14,8 @@ #ifndef MLIR_TOOLS_LSPSERVERSUPPORT_SOURCEMGRUTILS_H #define MLIR_TOOLS_LSPSERVERSUPPORT_SOURCEMGRUTILS_H -#include "mlir/Tools/lsp-server-support/Protocol.h" +#include "mlir/Support/LLVM.h" +#include "llvm/Support/LSP/Protocol.h" #include "llvm/Support/SourceMgr.h" #include @@ -45,17 +46,18 @@ bool contains(SMRange range, SMLoc loc); /// This class represents a single include within a root file. struct SourceMgrInclude { - SourceMgrInclude(const lsp::URIForFile &uri, const lsp::Range &range) + SourceMgrInclude(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Range &range) : uri(uri), range(range) {} /// Build a hover for the current include file. - Hover buildHover() const; + llvm::lsp::Hover buildHover() const; /// The URI of the file that is included. - lsp::URIForFile uri; + llvm::lsp::URIForFile uri; /// The range of the include directive. - lsp::Range range; + llvm::lsp::Range range; }; /// Given a source manager, gather all of the processed include files. These are diff --git a/mlir/include/mlir/Tools/lsp-server-support/Transport.h b/mlir/include/mlir/Tools/lsp-server-support/Transport.h deleted file mode 100644 index 0010a475fedd2..0000000000000 --- a/mlir/include/mlir/Tools/lsp-server-support/Transport.h +++ /dev/null @@ -1,283 +0,0 @@ -//===--- Transport.h - Sending and Receiving LSP messages -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// The language server protocol is usually implemented by writing messages as -// JSON-RPC over the stdin/stdout of a subprocess. This file contains a JSON -// transport interface that handles this communication. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_TOOLS_LSPSERVERSUPPORT_TRANSPORT_H -#define MLIR_TOOLS_LSPSERVERSUPPORT_TRANSPORT_H - -#include "mlir/Support/DebugStringHelper.h" -#include "mlir/Support/LLVM.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Protocol.h" -#include "llvm/ADT/FunctionExtras.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/FormatAdapters.h" -#include "llvm/Support/JSON.h" -#include "llvm/Support/raw_ostream.h" -#include - -namespace mlir { -namespace lsp { -class MessageHandler; - -//===----------------------------------------------------------------------===// -// JSONTransport -//===----------------------------------------------------------------------===// - -/// The encoding style of the JSON-RPC messages (both input and output). -enum JSONStreamStyle { - /// Encoding per the LSP specification, with mandatory Content-Length header. - Standard, - /// Messages are delimited by a '// -----' line. Comment lines start with //. - Delimited -}; - -/// An abstract class used by the JSONTransport to read JSON message. -class JSONTransportInput { -public: - explicit JSONTransportInput(JSONStreamStyle style = JSONStreamStyle::Standard) - : style(style) {} - virtual ~JSONTransportInput() = default; - - virtual bool hasError() const = 0; - virtual bool isEndOfInput() const = 0; - - /// Read in a message from the input stream. - LogicalResult readMessage(std::string &json) { - return style == JSONStreamStyle::Delimited ? readDelimitedMessage(json) - : readStandardMessage(json); - } - virtual LogicalResult readDelimitedMessage(std::string &json) = 0; - virtual LogicalResult readStandardMessage(std::string &json) = 0; - -private: - /// The JSON stream style to use. - JSONStreamStyle style; -}; - -/// Concrete implementation of the JSONTransportInput that reads from a file. -class JSONTransportInputOverFile : public JSONTransportInput { -public: - explicit JSONTransportInputOverFile( - std::FILE *in, JSONStreamStyle style = JSONStreamStyle::Standard) - : JSONTransportInput(style), in(in) {} - - bool hasError() const final { return ferror(in); } - bool isEndOfInput() const final { return feof(in); } - - LogicalResult readDelimitedMessage(std::string &json) final; - LogicalResult readStandardMessage(std::string &json) final; - -private: - std::FILE *in; -}; - -/// A transport class that performs the JSON-RPC communication with the LSP -/// client. -class JSONTransport { -public: - JSONTransport(std::unique_ptr in, raw_ostream &out, - bool prettyOutput = false) - : in(std::move(in)), out(out), prettyOutput(prettyOutput) {} - - JSONTransport(std::FILE *in, raw_ostream &out, - JSONStreamStyle style = JSONStreamStyle::Standard, - bool prettyOutput = false) - : in(std::make_unique(in, style)), out(out), - prettyOutput(prettyOutput) {} - - /// The following methods are used to send a message to the LSP client. - void notify(StringRef method, llvm::json::Value params); - void call(StringRef method, llvm::json::Value params, llvm::json::Value id); - void reply(llvm::json::Value id, llvm::Expected result); - - /// Start executing the JSON-RPC transport. - llvm::Error run(MessageHandler &handler); - -private: - /// Dispatches the given incoming json message to the message handler. - bool handleMessage(llvm::json::Value msg, MessageHandler &handler); - /// Writes the given message to the output stream. - void sendMessage(llvm::json::Value msg); - -private: - /// The input to read a message from. - std::unique_ptr in; - SmallVector outputBuffer; - /// The output file stream. - raw_ostream &out; - /// If the output JSON should be formatted for easier readability. - bool prettyOutput; -}; - -//===----------------------------------------------------------------------===// -// MessageHandler -//===----------------------------------------------------------------------===// - -/// A Callback is a void function that accepts Expected. This is -/// accepted by functions that logically return T. -template -using Callback = llvm::unique_function)>; - -/// An OutgoingNotification is a function used for outgoing notifications -/// send to the client. -template -using OutgoingNotification = llvm::unique_function; - -/// An OutgoingRequest is a function used for outgoing requests to send to -/// the client. -template -using OutgoingRequest = - llvm::unique_function; - -/// An `OutgoingRequestCallback` is invoked when an outgoing request to the -/// client receives a response in turn. It is passed the original request's ID, -/// as well as the response result. -template -using OutgoingRequestCallback = - std::function)>; - -/// A handler used to process the incoming transport messages. -class MessageHandler { -public: - MessageHandler(JSONTransport &transport) : transport(transport) {} - - bool onNotify(StringRef method, llvm::json::Value value); - bool onCall(StringRef method, llvm::json::Value params, llvm::json::Value id); - bool onReply(llvm::json::Value id, llvm::Expected result); - - template - static llvm::Expected parse(const llvm::json::Value &raw, - StringRef payloadName, StringRef payloadKind) { - T result; - llvm::json::Path::Root root; - if (fromJSON(raw, result, root)) - return std::move(result); - - // Dump the relevant parts of the broken message. - std::string context; - llvm::raw_string_ostream os(context); - root.printErrorContext(raw, os); - - // Report the error (e.g. to the client). - return llvm::make_error( - llvm::formatv("failed to decode {0} {1}: {2}", payloadName, payloadKind, - fmt_consume(root.getError())), - ErrorCode::InvalidParams); - } - - template - void method(llvm::StringLiteral method, ThisT *thisPtr, - void (ThisT::*handler)(const Param &, Callback)) { - methodHandlers[method] = [method, handler, - thisPtr](llvm::json::Value rawParams, - Callback reply) { - llvm::Expected param = parse(rawParams, method, "request"); - if (!param) - return reply(param.takeError()); - (thisPtr->*handler)(*param, std::move(reply)); - }; - } - - template - void notification(llvm::StringLiteral method, ThisT *thisPtr, - void (ThisT::*handler)(const Param &)) { - notificationHandlers[method] = [method, handler, - thisPtr](llvm::json::Value rawParams) { - llvm::Expected param = - parse(rawParams, method, "notification"); - if (!param) { - return llvm::consumeError( - llvm::handleErrors(param.takeError(), [](const LSPError &lspError) { - Logger::error("JSON parsing error: {0}", - lspError.message.c_str()); - })); - } - (thisPtr->*handler)(*param); - }; - } - - /// Create an OutgoingNotification object used for the given method. - template - OutgoingNotification outgoingNotification(llvm::StringLiteral method) { - return [&, method](const T ¶ms) { - std::lock_guard transportLock(transportOutputMutex); - Logger::info("--> {0}", method); - transport.notify(method, llvm::json::Value(params)); - }; - } - - /// Create an OutgoingRequest function that, when called, sends a request with - /// the given method via the transport. Should the outgoing request be - /// met with a response, the result JSON is parsed and the response callback - /// is invoked. - template - OutgoingRequest - outgoingRequest(llvm::StringLiteral method, - OutgoingRequestCallback callback) { - return [&, method, callback](const Param ¶m, llvm::json::Value id) { - auto callbackWrapper = [method, callback = std::move(callback)]( - llvm::json::Value id, - llvm::Expected value) { - if (!value) - return callback(std::move(id), value.takeError()); - - std::string responseName = llvm::formatv("reply:{0}({1})", method, id); - llvm::Expected result = - parse(*value, responseName, "response"); - if (!result) - return callback(std::move(id), result.takeError()); - - return callback(std::move(id), *result); - }; - - { - std::lock_guard lock(responseHandlersMutex); - responseHandlers.insert( - {debugString(id), std::make_pair(method.str(), callbackWrapper)}); - } - - std::lock_guard transportLock(transportOutputMutex); - Logger::info("--> {0}({1})", method, id); - transport.call(method, llvm::json::Value(param), id); - }; - } - -private: - template - using HandlerMap = llvm::StringMap>; - - HandlerMap notificationHandlers; - HandlerMap)> - methodHandlers; - - /// A pair of (1) the original request's method name, and (2) the callback - /// function to be invoked for responses. - using ResponseHandlerTy = - std::pair>; - /// A mapping from request/response ID to response handler. - llvm::StringMap responseHandlers; - /// Mutex to guard insertion into the response handler map. - std::mutex responseHandlersMutex; - - JSONTransport &transport; - - /// Mutex to guard sending output messages to the transport. - std::mutex transportOutputMutex; -}; - -} // namespace lsp -} // namespace mlir - -#endif diff --git a/mlir/include/mlir/Tools/mlir-lsp-server/MlirLspRegistryFunction.h b/mlir/include/mlir/Tools/mlir-lsp-server/MlirLspRegistryFunction.h index 4811ecb5e92b7..0d9ba2a0d1607 100644 --- a/mlir/include/mlir/Tools/mlir-lsp-server/MlirLspRegistryFunction.h +++ b/mlir/include/mlir/Tools/mlir-lsp-server/MlirLspRegistryFunction.h @@ -16,14 +16,16 @@ namespace llvm { template class function_ref; +namespace lsp { +class URIForFile; +} // namespace lsp } // namespace llvm namespace mlir { class DialectRegistry; namespace lsp { -class URIForFile; using DialectRegistryFn = - llvm::function_ref; + llvm::function_ref; } // namespace lsp } // namespace mlir diff --git a/mlir/lib/Tools/lsp-server-support/CMakeLists.txt b/mlir/lib/Tools/lsp-server-support/CMakeLists.txt index 48a96016b792f..2fe29f1b9ec41 100644 --- a/mlir/lib/Tools/lsp-server-support/CMakeLists.txt +++ b/mlir/lib/Tools/lsp-server-support/CMakeLists.txt @@ -1,13 +1,13 @@ add_mlir_library(MLIRLspServerSupportLib CompilationDatabase.cpp - Logging.cpp - Protocol.cpp SourceMgrUtils.cpp - Transport.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Tools/lsp-server-support + LINK_COMPONENTS + SupportLSP + LINK_LIBS PUBLIC MLIRSupport - ) +) diff --git a/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp b/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp index 9ae0674383a1d..67b8ef6a256bb 100644 --- a/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp +++ b/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp @@ -8,14 +8,15 @@ #include "mlir/Tools/lsp-server-support/CompilationDatabase.h" #include "mlir/Support/FileUtilities.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Protocol.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Protocol.h" #include "llvm/Support/YAMLTraits.h" using namespace mlir; using namespace mlir::lsp; +using llvm::lsp::Logger; //===----------------------------------------------------------------------===// // YamlFileInfo diff --git a/mlir/lib/Tools/lsp-server-support/Protocol.cpp b/mlir/lib/Tools/lsp-server-support/Protocol.cpp deleted file mode 100644 index 98287048355c1..0000000000000 --- a/mlir/lib/Tools/lsp-server-support/Protocol.cpp +++ /dev/null @@ -1,1043 +0,0 @@ -//===--- Protocol.cpp - Language Server Protocol Implementation -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the serialization code for the LSP structs. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Tools/lsp-server-support/Protocol.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/JSON.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" - -using namespace mlir; -using namespace mlir::lsp; - -// Helper that doesn't treat `null` and absent fields as failures. -template -static bool mapOptOrNull(const llvm::json::Value ¶ms, - llvm::StringLiteral prop, T &out, - llvm::json::Path path) { - const llvm::json::Object *o = params.getAsObject(); - assert(o); - - // Field is missing or null. - auto *v = o->get(prop); - if (!v || v->getAsNull()) - return true; - return fromJSON(*v, out, path.field(prop)); -} - -//===----------------------------------------------------------------------===// -// LSPError -//===----------------------------------------------------------------------===// - -char LSPError::ID; - -//===----------------------------------------------------------------------===// -// URIForFile -//===----------------------------------------------------------------------===// - -static bool isWindowsPath(StringRef path) { - return path.size() > 1 && llvm::isAlpha(path[0]) && path[1] == ':'; -} - -static bool isNetworkPath(StringRef path) { - return path.size() > 2 && path[0] == path[1] && - llvm::sys::path::is_separator(path[0]); -} - -static bool shouldEscapeInURI(unsigned char c) { - // Unreserved characters. - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9')) - return false; - - switch (c) { - case '-': - case '_': - case '.': - case '~': - // '/' is only reserved when parsing. - case '/': - // ':' is only reserved for relative URI paths, which we doesn't produce. - case ':': - return false; - } - return true; -} - -/// Encodes a string according to percent-encoding. -/// - Unreserved characters are not escaped. -/// - Reserved characters always escaped with exceptions like '/'. -/// - All other characters are escaped. -static void percentEncode(StringRef content, std::string &out) { - for (unsigned char c : content) { - if (shouldEscapeInURI(c)) { - out.push_back('%'); - out.push_back(llvm::hexdigit(c / 16)); - out.push_back(llvm::hexdigit(c % 16)); - } else { - out.push_back(c); - } - } -} - -/// Decodes a string according to percent-encoding. -static std::string percentDecode(StringRef content) { - std::string result; - for (auto i = content.begin(), e = content.end(); i != e; ++i) { - if (*i != '%') { - result += *i; - continue; - } - if (*i == '%' && i + 2 < content.end() && llvm::isHexDigit(*(i + 1)) && - llvm::isHexDigit(*(i + 2))) { - result.push_back(llvm::hexFromNibbles(*(i + 1), *(i + 2))); - i += 2; - } else { - result.push_back(*i); - } - } - return result; -} - -/// Return the set containing the supported URI schemes. -static StringSet<> &getSupportedSchemes() { - static StringSet<> schemes({"file", "test"}); - return schemes; -} - -/// Returns true if the given scheme is structurally valid, i.e. it does not -/// contain any invalid scheme characters. This does not check that the scheme -/// is actually supported. -static bool isStructurallyValidScheme(StringRef scheme) { - if (scheme.empty()) - return false; - if (!llvm::isAlpha(scheme[0])) - return false; - return llvm::all_of(llvm::drop_begin(scheme), [](char c) { - return llvm::isAlnum(c) || c == '+' || c == '.' || c == '-'; - }); -} - -static llvm::Expected uriFromAbsolutePath(StringRef absolutePath, - StringRef scheme) { - std::string body; - StringRef authority; - StringRef root = llvm::sys::path::root_name(absolutePath); - if (isNetworkPath(root)) { - // Windows UNC paths e.g. \\server\share => file://server/share - authority = root.drop_front(2); - absolutePath.consume_front(root); - } else if (isWindowsPath(root)) { - // Windows paths e.g. X:\path => file:///X:/path - body = "/"; - } - body += llvm::sys::path::convert_to_slash(absolutePath); - - std::string uri = scheme.str() + ":"; - if (authority.empty() && body.empty()) - return uri; - - // If authority if empty, we only print body if it starts with "/"; otherwise, - // the URI is invalid. - if (!authority.empty() || StringRef(body).starts_with("/")) { - uri.append("//"); - percentEncode(authority, uri); - } - percentEncode(body, uri); - return uri; -} - -static llvm::Expected getAbsolutePath(StringRef authority, - StringRef body) { - if (!body.starts_with("/")) - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "File scheme: expect body to be an absolute path starting " - "with '/': " + - body); - SmallString<128> path; - if (!authority.empty()) { - // Windows UNC paths e.g. file://server/share => \\server\share - ("//" + authority).toVector(path); - } else if (isWindowsPath(body.substr(1))) { - // Windows paths e.g. file:///X:/path => X:\path - body.consume_front("/"); - } - path.append(body); - llvm::sys::path::native(path); - return std::string(path); -} - -static llvm::Expected parseFilePathFromURI(StringRef origUri) { - StringRef uri = origUri; - - // Decode the scheme of the URI. - size_t pos = uri.find(':'); - if (pos == StringRef::npos) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Scheme must be provided in URI: " + - origUri); - StringRef schemeStr = uri.substr(0, pos); - std::string uriScheme = percentDecode(schemeStr); - if (!isStructurallyValidScheme(uriScheme)) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Invalid scheme: " + schemeStr + - " (decoded: " + uriScheme + ")"); - uri = uri.substr(pos + 1); - - // Decode the authority of the URI. - std::string uriAuthority; - if (uri.consume_front("//")) { - pos = uri.find('/'); - uriAuthority = percentDecode(uri.substr(0, pos)); - uri = uri.substr(pos); - } - - // Decode the body of the URI. - std::string uriBody = percentDecode(uri); - - // Compute the absolute path for this uri. - if (!getSupportedSchemes().contains(uriScheme)) { - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "unsupported URI scheme `" + uriScheme + - "' for workspace files"); - } - return getAbsolutePath(uriAuthority, uriBody); -} - -llvm::Expected URIForFile::fromURI(StringRef uri) { - llvm::Expected filePath = parseFilePathFromURI(uri); - if (!filePath) - return filePath.takeError(); - return URIForFile(std::move(*filePath), uri.str()); -} - -llvm::Expected URIForFile::fromFile(StringRef absoluteFilepath, - StringRef scheme) { - llvm::Expected uri = - uriFromAbsolutePath(absoluteFilepath, scheme); - if (!uri) - return uri.takeError(); - return fromURI(*uri); -} - -StringRef URIForFile::scheme() const { return uri().split(':').first; } - -void URIForFile::registerSupportedScheme(StringRef scheme) { - getSupportedSchemes().insert(scheme); -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, URIForFile &result, - llvm::json::Path path) { - if (std::optional str = value.getAsString()) { - llvm::Expected expectedURI = URIForFile::fromURI(*str); - if (!expectedURI) { - path.report("unresolvable URI"); - consumeError(expectedURI.takeError()); - return false; - } - result = std::move(*expectedURI); - return true; - } - return false; -} - -llvm::json::Value mlir::lsp::toJSON(const URIForFile &value) { - return value.uri(); -} - -raw_ostream &mlir::lsp::operator<<(raw_ostream &os, const URIForFile &value) { - return os << value.uri(); -} - -//===----------------------------------------------------------------------===// -// ClientCapabilities -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - ClientCapabilities &result, llvm::json::Path path) { - const llvm::json::Object *o = value.getAsObject(); - if (!o) { - path.report("expected object"); - return false; - } - if (const llvm::json::Object *textDocument = o->getObject("textDocument")) { - if (const llvm::json::Object *documentSymbol = - textDocument->getObject("documentSymbol")) { - if (std::optional hierarchicalSupport = - documentSymbol->getBoolean("hierarchicalDocumentSymbolSupport")) - result.hierarchicalDocumentSymbol = *hierarchicalSupport; - } - if (auto *codeAction = textDocument->getObject("codeAction")) { - if (codeAction->getObject("codeActionLiteralSupport")) - result.codeActionStructure = true; - } - } - if (auto *window = o->getObject("window")) { - if (std::optional workDoneProgressSupport = - window->getBoolean("workDoneProgress")) - result.workDoneProgress = *workDoneProgressSupport; - } - return true; -} - -//===----------------------------------------------------------------------===// -// ClientInfo -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, ClientInfo &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - if (!o || !o.map("name", result.name)) - return false; - - // Don't fail if we can't parse version. - o.map("version", result.version); - return true; -} - -//===----------------------------------------------------------------------===// -// InitializeParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, TraceLevel &result, - llvm::json::Path path) { - if (std::optional str = value.getAsString()) { - if (*str == "off") { - result = TraceLevel::Off; - return true; - } - if (*str == "messages") { - result = TraceLevel::Messages; - return true; - } - if (*str == "verbose") { - result = TraceLevel::Verbose; - return true; - } - } - return false; -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - InitializeParams &result, llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - if (!o) - return false; - // We deliberately don't fail if we can't parse individual fields. - o.map("capabilities", result.capabilities); - o.map("trace", result.trace); - mapOptOrNull(value, "clientInfo", result.clientInfo, path); - - return true; -} - -//===----------------------------------------------------------------------===// -// TextDocumentItem -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - TextDocumentItem &result, llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("uri", result.uri) && - o.map("languageId", result.languageId) && o.map("text", result.text) && - o.map("version", result.version); -} - -//===----------------------------------------------------------------------===// -// TextDocumentIdentifier -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const TextDocumentIdentifier &value) { - return llvm::json::Object{{"uri", value.uri}}; -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - TextDocumentIdentifier &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("uri", result.uri); -} - -//===----------------------------------------------------------------------===// -// VersionedTextDocumentIdentifier -//===----------------------------------------------------------------------===// - -llvm::json::Value -mlir::lsp::toJSON(const VersionedTextDocumentIdentifier &value) { - return llvm::json::Object{ - {"uri", value.uri}, - {"version", value.version}, - }; -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - VersionedTextDocumentIdentifier &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("uri", result.uri) && o.map("version", result.version); -} - -//===----------------------------------------------------------------------===// -// Position -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, Position &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("line", result.line) && - o.map("character", result.character); -} - -llvm::json::Value mlir::lsp::toJSON(const Position &value) { - return llvm::json::Object{ - {"line", value.line}, - {"character", value.character}, - }; -} - -raw_ostream &mlir::lsp::operator<<(raw_ostream &os, const Position &value) { - return os << value.line << ':' << value.character; -} - -//===----------------------------------------------------------------------===// -// Range -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, Range &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("start", result.start) && o.map("end", result.end); -} - -llvm::json::Value mlir::lsp::toJSON(const Range &value) { - return llvm::json::Object{ - {"start", value.start}, - {"end", value.end}, - }; -} - -raw_ostream &mlir::lsp::operator<<(raw_ostream &os, const Range &value) { - return os << value.start << '-' << value.end; -} - -//===----------------------------------------------------------------------===// -// Location -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, Location &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("uri", result.uri) && o.map("range", result.range); -} - -llvm::json::Value mlir::lsp::toJSON(const Location &value) { - return llvm::json::Object{ - {"uri", value.uri}, - {"range", value.range}, - }; -} - -raw_ostream &mlir::lsp::operator<<(raw_ostream &os, const Location &value) { - return os << value.range << '@' << value.uri; -} - -//===----------------------------------------------------------------------===// -// TextDocumentPositionParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - TextDocumentPositionParams &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("textDocument", result.textDocument) && - o.map("position", result.position); -} - -//===----------------------------------------------------------------------===// -// ReferenceParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - ReferenceContext &result, llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.mapOptional("includeDeclaration", result.includeDeclaration); -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - ReferenceParams &result, llvm::json::Path path) { - TextDocumentPositionParams &base = result; - llvm::json::ObjectMapper o(value, path); - return fromJSON(value, base, path) && o && - o.mapOptional("context", result.context); -} - -//===----------------------------------------------------------------------===// -// DidOpenTextDocumentParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - DidOpenTextDocumentParams &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("textDocument", result.textDocument); -} - -//===----------------------------------------------------------------------===// -// DidCloseTextDocumentParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - DidCloseTextDocumentParams &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("textDocument", result.textDocument); -} - -//===----------------------------------------------------------------------===// -// DidChangeTextDocumentParams -//===----------------------------------------------------------------------===// - -LogicalResult -TextDocumentContentChangeEvent::applyTo(std::string &contents) const { - // If there is no range, the full document changed. - if (!range) { - contents = text; - return success(); - } - - // Try to map the replacement range to the content. - llvm::SourceMgr tmpScrMgr; - tmpScrMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(contents), - SMLoc()); - SMRange rangeLoc = range->getAsSMRange(tmpScrMgr); - if (!rangeLoc.isValid()) - return failure(); - - contents.replace(rangeLoc.Start.getPointer() - contents.data(), - rangeLoc.End.getPointer() - rangeLoc.Start.getPointer(), - text); - return success(); -} - -LogicalResult TextDocumentContentChangeEvent::applyTo( - ArrayRef changes, std::string &contents) { - for (const auto &change : changes) - if (failed(change.applyTo(contents))) - return failure(); - return success(); -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - TextDocumentContentChangeEvent &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("range", result.range) && - o.map("rangeLength", result.rangeLength) && o.map("text", result.text); -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - DidChangeTextDocumentParams &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("textDocument", result.textDocument) && - o.map("contentChanges", result.contentChanges); -} - -//===----------------------------------------------------------------------===// -// MarkupContent -//===----------------------------------------------------------------------===// - -static llvm::StringRef toTextKind(MarkupKind kind) { - switch (kind) { - case MarkupKind::PlainText: - return "plaintext"; - case MarkupKind::Markdown: - return "markdown"; - } - llvm_unreachable("Invalid MarkupKind"); -} - -raw_ostream &mlir::lsp::operator<<(raw_ostream &os, MarkupKind kind) { - return os << toTextKind(kind); -} - -llvm::json::Value mlir::lsp::toJSON(const MarkupContent &mc) { - if (mc.value.empty()) - return nullptr; - - return llvm::json::Object{ - {"kind", toTextKind(mc.kind)}, - {"value", mc.value}, - }; -} - -//===----------------------------------------------------------------------===// -// Hover -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const Hover &hover) { - llvm::json::Object result{{"contents", toJSON(hover.contents)}}; - if (hover.range) - result["range"] = toJSON(*hover.range); - return std::move(result); -} - -//===----------------------------------------------------------------------===// -// DocumentSymbol -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const DocumentSymbol &symbol) { - llvm::json::Object result{{"name", symbol.name}, - {"kind", static_cast(symbol.kind)}, - {"range", symbol.range}, - {"selectionRange", symbol.selectionRange}}; - - if (!symbol.detail.empty()) - result["detail"] = symbol.detail; - if (!symbol.children.empty()) - result["children"] = symbol.children; - return std::move(result); -} - -//===----------------------------------------------------------------------===// -// DocumentSymbolParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - DocumentSymbolParams &result, llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("textDocument", result.textDocument); -} - -//===----------------------------------------------------------------------===// -// DiagnosticRelatedInformation -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - DiagnosticRelatedInformation &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("location", result.location) && - o.map("message", result.message); -} - -llvm::json::Value mlir::lsp::toJSON(const DiagnosticRelatedInformation &info) { - return llvm::json::Object{ - {"location", info.location}, - {"message", info.message}, - }; -} - -//===----------------------------------------------------------------------===// -// Diagnostic -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(DiagnosticTag tag) { - return static_cast(tag); -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, DiagnosticTag &result, - llvm::json::Path path) { - if (std::optional i = value.getAsInteger()) { - result = (DiagnosticTag)*i; - return true; - } - - return false; -} - -llvm::json::Value mlir::lsp::toJSON(const Diagnostic &diag) { - llvm::json::Object result{ - {"range", diag.range}, - {"severity", (int)diag.severity}, - {"message", diag.message}, - }; - if (diag.category) - result["category"] = *diag.category; - if (!diag.source.empty()) - result["source"] = diag.source; - if (diag.relatedInformation) - result["relatedInformation"] = *diag.relatedInformation; - if (!diag.tags.empty()) - result["tags"] = diag.tags; - return std::move(result); -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, Diagnostic &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - if (!o) - return false; - int severity = 0; - if (!mapOptOrNull(value, "severity", severity, path)) - return false; - result.severity = (DiagnosticSeverity)severity; - - return o.map("range", result.range) && o.map("message", result.message) && - mapOptOrNull(value, "category", result.category, path) && - mapOptOrNull(value, "source", result.source, path) && - mapOptOrNull(value, "relatedInformation", result.relatedInformation, - path) && - mapOptOrNull(value, "tags", result.tags, path); -} - -//===----------------------------------------------------------------------===// -// PublishDiagnosticsParams -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const PublishDiagnosticsParams ¶ms) { - return llvm::json::Object{ - {"uri", params.uri}, - {"diagnostics", params.diagnostics}, - {"version", params.version}, - }; -} - -//===----------------------------------------------------------------------===// -// TextEdit -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, TextEdit &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("range", result.range) && o.map("newText", result.newText); -} - -llvm::json::Value mlir::lsp::toJSON(const TextEdit &value) { - return llvm::json::Object{ - {"range", value.range}, - {"newText", value.newText}, - }; -} - -raw_ostream &mlir::lsp::operator<<(raw_ostream &os, const TextEdit &value) { - os << value.range << " => \""; - llvm::printEscapedString(value.newText, os); - return os << '"'; -} - -//===----------------------------------------------------------------------===// -// CompletionItemKind -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - CompletionItemKind &result, llvm::json::Path path) { - if (std::optional intValue = value.getAsInteger()) { - if (*intValue < static_cast(CompletionItemKind::Text) || - *intValue > static_cast(CompletionItemKind::TypeParameter)) - return false; - result = static_cast(*intValue); - return true; - } - return false; -} - -CompletionItemKind mlir::lsp::adjustKindToCapability( - CompletionItemKind kind, - CompletionItemKindBitset &supportedCompletionItemKinds) { - size_t kindVal = static_cast(kind); - if (kindVal >= kCompletionItemKindMin && - kindVal <= supportedCompletionItemKinds.size() && - supportedCompletionItemKinds[kindVal]) - return kind; - - // Provide some fall backs for common kinds that are close enough. - switch (kind) { - case CompletionItemKind::Folder: - return CompletionItemKind::File; - case CompletionItemKind::EnumMember: - return CompletionItemKind::Enum; - case CompletionItemKind::Struct: - return CompletionItemKind::Class; - default: - return CompletionItemKind::Text; - } -} - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - CompletionItemKindBitset &result, - llvm::json::Path path) { - if (const llvm::json::Array *arrayValue = value.getAsArray()) { - for (size_t i = 0, e = arrayValue->size(); i < e; ++i) { - CompletionItemKind kindOut; - if (fromJSON((*arrayValue)[i], kindOut, path.index(i))) - result.set(size_t(kindOut)); - } - return true; - } - return false; -} - -//===----------------------------------------------------------------------===// -// CompletionItem -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const CompletionItem &value) { - assert(!value.label.empty() && "completion item label is required"); - llvm::json::Object result{{"label", value.label}}; - if (value.kind != CompletionItemKind::Missing) - result["kind"] = static_cast(value.kind); - if (!value.detail.empty()) - result["detail"] = value.detail; - if (value.documentation) - result["documentation"] = value.documentation; - if (!value.sortText.empty()) - result["sortText"] = value.sortText; - if (!value.filterText.empty()) - result["filterText"] = value.filterText; - if (!value.insertText.empty()) - result["insertText"] = value.insertText; - if (value.insertTextFormat != InsertTextFormat::Missing) - result["insertTextFormat"] = static_cast(value.insertTextFormat); - if (value.textEdit) - result["textEdit"] = *value.textEdit; - if (!value.additionalTextEdits.empty()) { - result["additionalTextEdits"] = - llvm::json::Array(value.additionalTextEdits); - } - if (value.deprecated) - result["deprecated"] = value.deprecated; - return std::move(result); -} - -raw_ostream &mlir::lsp::operator<<(raw_ostream &os, - const CompletionItem &value) { - return os << value.label << " - " << toJSON(value); -} - -bool mlir::lsp::operator<(const CompletionItem &lhs, - const CompletionItem &rhs) { - return (lhs.sortText.empty() ? lhs.label : lhs.sortText) < - (rhs.sortText.empty() ? rhs.label : rhs.sortText); -} - -//===----------------------------------------------------------------------===// -// CompletionList -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const CompletionList &value) { - return llvm::json::Object{ - {"isIncomplete", value.isIncomplete}, - {"items", llvm::json::Array(value.items)}, - }; -} - -//===----------------------------------------------------------------------===// -// CompletionContext -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - CompletionContext &result, llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - int triggerKind; - if (!o || !o.map("triggerKind", triggerKind) || - !mapOptOrNull(value, "triggerCharacter", result.triggerCharacter, path)) - return false; - result.triggerKind = static_cast(triggerKind); - return true; -} - -//===----------------------------------------------------------------------===// -// CompletionParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - CompletionParams &result, llvm::json::Path path) { - if (!fromJSON(value, static_cast(result), path)) - return false; - if (const llvm::json::Value *context = value.getAsObject()->get("context")) - return fromJSON(*context, result.context, path.field("context")); - return true; -} - -//===----------------------------------------------------------------------===// -// ParameterInformation -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const ParameterInformation &value) { - assert((value.labelOffsets || !value.labelString.empty()) && - "parameter information label is required"); - llvm::json::Object result; - if (value.labelOffsets) - result["label"] = llvm::json::Array( - {value.labelOffsets->first, value.labelOffsets->second}); - else - result["label"] = value.labelString; - if (!value.documentation.empty()) - result["documentation"] = value.documentation; - return std::move(result); -} - -//===----------------------------------------------------------------------===// -// SignatureInformation -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const SignatureInformation &value) { - assert(!value.label.empty() && "signature information label is required"); - llvm::json::Object result{ - {"label", value.label}, - {"parameters", llvm::json::Array(value.parameters)}, - }; - if (!value.documentation.empty()) - result["documentation"] = value.documentation; - return std::move(result); -} - -raw_ostream &mlir::lsp::operator<<(raw_ostream &os, - const SignatureInformation &value) { - return os << value.label << " - " << toJSON(value); -} - -//===----------------------------------------------------------------------===// -// SignatureHelp -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const SignatureHelp &value) { - assert(value.activeSignature >= 0 && - "Unexpected negative value for number of active signatures."); - assert(value.activeParameter >= 0 && - "Unexpected negative value for active parameter index"); - return llvm::json::Object{ - {"activeSignature", value.activeSignature}, - {"activeParameter", value.activeParameter}, - {"signatures", llvm::json::Array(value.signatures)}, - }; -} - -//===----------------------------------------------------------------------===// -// DocumentLinkParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - DocumentLinkParams &result, llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("textDocument", result.textDocument); -} - -//===----------------------------------------------------------------------===// -// DocumentLink -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const DocumentLink &value) { - return llvm::json::Object{ - {"range", value.range}, - {"target", value.target}, - }; -} - -//===----------------------------------------------------------------------===// -// InlayHintsParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - InlayHintsParams &result, llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("textDocument", result.textDocument) && - o.map("range", result.range); -} - -//===----------------------------------------------------------------------===// -// InlayHint -//===----------------------------------------------------------------------===// - -llvm::json::Value mlir::lsp::toJSON(const InlayHint &value) { - return llvm::json::Object{{"position", value.position}, - {"kind", (int)value.kind}, - {"label", value.label}, - {"paddingLeft", value.paddingLeft}, - {"paddingRight", value.paddingRight}}; -} -bool mlir::lsp::operator==(const InlayHint &lhs, const InlayHint &rhs) { - return std::tie(lhs.position, lhs.kind, lhs.label) == - std::tie(rhs.position, rhs.kind, rhs.label); -} -bool mlir::lsp::operator<(const InlayHint &lhs, const InlayHint &rhs) { - return std::tie(lhs.position, lhs.kind, lhs.label) < - std::tie(rhs.position, rhs.kind, rhs.label); -} - -llvm::raw_ostream &mlir::lsp::operator<<(llvm::raw_ostream &os, - InlayHintKind value) { - switch (value) { - case InlayHintKind::Parameter: - return os << "parameter"; - case InlayHintKind::Type: - return os << "type"; - } - llvm_unreachable("Unknown InlayHintKind"); -} - -//===----------------------------------------------------------------------===// -// CodeActionContext -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - CodeActionContext &result, llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - if (!o || !o.map("diagnostics", result.diagnostics)) - return false; - o.map("only", result.only); - return true; -} - -//===----------------------------------------------------------------------===// -// CodeActionParams -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, - CodeActionParams &result, llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("textDocument", result.textDocument) && - o.map("range", result.range) && o.map("context", result.context); -} - -//===----------------------------------------------------------------------===// -// WorkspaceEdit -//===----------------------------------------------------------------------===// - -bool mlir::lsp::fromJSON(const llvm::json::Value &value, WorkspaceEdit &result, - llvm::json::Path path) { - llvm::json::ObjectMapper o(value, path); - return o && o.map("changes", result.changes); -} - -llvm::json::Value mlir::lsp::toJSON(const WorkspaceEdit &value) { - llvm::json::Object fileChanges; - for (auto &change : value.changes) - fileChanges[change.first] = llvm::json::Array(change.second); - return llvm::json::Object{{"changes", std::move(fileChanges)}}; -} - -//===----------------------------------------------------------------------===// -// CodeAction -//===----------------------------------------------------------------------===// - -const llvm::StringLiteral CodeAction::kQuickFix = "quickfix"; -const llvm::StringLiteral CodeAction::kRefactor = "refactor"; -const llvm::StringLiteral CodeAction::kInfo = "info"; - -llvm::json::Value mlir::lsp::toJSON(const CodeAction &value) { - llvm::json::Object codeAction{{"title", value.title}}; - if (value.kind) - codeAction["kind"] = *value.kind; - if (value.diagnostics) - codeAction["diagnostics"] = llvm::json::Array(*value.diagnostics); - if (value.isPreferred) - codeAction["isPreferred"] = true; - if (value.edit) - codeAction["edit"] = *value.edit; - return std::move(codeAction); -} diff --git a/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp b/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp index f1a362385f285..5cd1c85d054ab 100644 --- a/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp +++ b/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp @@ -14,6 +14,10 @@ using namespace mlir; using namespace mlir::lsp; +using llvm::lsp::Hover; +using llvm::lsp::Range; +using llvm::lsp::URIForFile; + //===----------------------------------------------------------------------===// // Utils //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Tools/lsp-server-support/Transport.cpp b/mlir/lib/Tools/lsp-server-support/Transport.cpp deleted file mode 100644 index 5a098b2841f4b..0000000000000 --- a/mlir/lib/Tools/lsp-server-support/Transport.cpp +++ /dev/null @@ -1,369 +0,0 @@ -//===--- JSONTransport.cpp - sending and receiving LSP messages over JSON -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Tools/lsp-server-support/Transport.h" -#include "mlir/Support/ToolUtilities.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Protocol.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/Error.h" -#include -#include -#include - -using namespace mlir; -using namespace mlir::lsp; - -//===----------------------------------------------------------------------===// -// Reply -//===----------------------------------------------------------------------===// - -namespace { -/// Function object to reply to an LSP call. -/// Each instance must be called exactly once, otherwise: -/// - if there was no reply, an error reply is sent -/// - if there were multiple replies, only the first is sent -class Reply { -public: - Reply(const llvm::json::Value &id, StringRef method, JSONTransport &transport, - std::mutex &transportOutputMutex); - Reply(Reply &&other); - Reply &operator=(Reply &&) = delete; - Reply(const Reply &) = delete; - Reply &operator=(const Reply &) = delete; - - void operator()(llvm::Expected reply); - -private: - std::string method; - std::atomic replied = {false}; - llvm::json::Value id; - JSONTransport *transport; - std::mutex &transportOutputMutex; -}; -} // namespace - -Reply::Reply(const llvm::json::Value &id, llvm::StringRef method, - JSONTransport &transport, std::mutex &transportOutputMutex) - : method(method), id(id), transport(&transport), - transportOutputMutex(transportOutputMutex) {} - -Reply::Reply(Reply &&other) - : method(other.method), replied(other.replied.load()), - id(std::move(other.id)), transport(other.transport), - transportOutputMutex(other.transportOutputMutex) { - other.transport = nullptr; -} - -void Reply::operator()(llvm::Expected reply) { - if (replied.exchange(true)) { - Logger::error("Replied twice to message {0}({1})", method, id); - assert(false && "must reply to each call only once!"); - return; - } - assert(transport && "expected valid transport to reply to"); - - std::lock_guard transportLock(transportOutputMutex); - if (reply) { - Logger::info("--> reply:{0}({1})", method, id); - transport->reply(std::move(id), std::move(reply)); - } else { - llvm::Error error = reply.takeError(); - Logger::info("--> reply:{0}({1}): {2}", method, id, error); - transport->reply(std::move(id), std::move(error)); - } -} - -//===----------------------------------------------------------------------===// -// MessageHandler -//===----------------------------------------------------------------------===// - -bool MessageHandler::onNotify(llvm::StringRef method, llvm::json::Value value) { - Logger::info("--> {0}", method); - - if (method == "exit") - return false; - if (method == "$cancel") { - // TODO: Add support for cancelling requests. - } else { - auto it = notificationHandlers.find(method); - if (it != notificationHandlers.end()) - it->second(std::move(value)); - } - return true; -} - -bool MessageHandler::onCall(llvm::StringRef method, llvm::json::Value params, - llvm::json::Value id) { - Logger::info("--> {0}({1})", method, id); - - Reply reply(id, method, transport, transportOutputMutex); - - auto it = methodHandlers.find(method); - if (it != methodHandlers.end()) { - it->second(std::move(params), std::move(reply)); - } else { - reply(llvm::make_error("method not found: " + method.str(), - ErrorCode::MethodNotFound)); - } - return true; -} - -bool MessageHandler::onReply(llvm::json::Value id, - llvm::Expected result) { - // Find the response handler in the mapping. If it exists, move it out of the - // mapping and erase it. - ResponseHandlerTy responseHandler; - { - std::lock_guard responseHandlersLock(responseHandlersMutex); - auto it = responseHandlers.find(debugString(id)); - if (it != responseHandlers.end()) { - responseHandler = std::move(it->second); - responseHandlers.erase(it); - } - } - - // If we found a response handler, invoke it. Otherwise, log an error. - if (responseHandler.second) { - Logger::info("--> reply:{0}({1})", responseHandler.first, id); - responseHandler.second(std::move(id), std::move(result)); - } else { - Logger::error( - "received a reply with ID {0}, but there was no such outgoing request", - id); - if (!result) - llvm::consumeError(result.takeError()); - } - return true; -} - -//===----------------------------------------------------------------------===// -// JSONTransport -//===----------------------------------------------------------------------===// - -/// Encode the given error as a JSON object. -static llvm::json::Object encodeError(llvm::Error error) { - std::string message; - ErrorCode code = ErrorCode::UnknownErrorCode; - auto handlerFn = [&](const LSPError &lspError) -> llvm::Error { - message = lspError.message; - code = lspError.code; - return llvm::Error::success(); - }; - if (llvm::Error unhandled = llvm::handleErrors(std::move(error), handlerFn)) - message = llvm::toString(std::move(unhandled)); - - return llvm::json::Object{ - {"message", std::move(message)}, - {"code", int64_t(code)}, - }; -} - -/// Decode the given JSON object into an error. -llvm::Error decodeError(const llvm::json::Object &o) { - StringRef msg = o.getString("message").value_or("Unspecified error"); - if (std::optional code = o.getInteger("code")) - return llvm::make_error(msg.str(), ErrorCode(*code)); - return llvm::make_error(llvm::inconvertibleErrorCode(), - msg.str()); -} - -void JSONTransport::notify(StringRef method, llvm::json::Value params) { - sendMessage(llvm::json::Object{ - {"jsonrpc", "2.0"}, - {"method", method}, - {"params", std::move(params)}, - }); -} -void JSONTransport::call(StringRef method, llvm::json::Value params, - llvm::json::Value id) { - sendMessage(llvm::json::Object{ - {"jsonrpc", "2.0"}, - {"id", std::move(id)}, - {"method", method}, - {"params", std::move(params)}, - }); -} -void JSONTransport::reply(llvm::json::Value id, - llvm::Expected result) { - if (result) { - return sendMessage(llvm::json::Object{ - {"jsonrpc", "2.0"}, - {"id", std::move(id)}, - {"result", std::move(*result)}, - }); - } - - sendMessage(llvm::json::Object{ - {"jsonrpc", "2.0"}, - {"id", std::move(id)}, - {"error", encodeError(result.takeError())}, - }); -} - -llvm::Error JSONTransport::run(MessageHandler &handler) { - std::string json; - while (!in->isEndOfInput()) { - if (in->hasError()) { - return llvm::errorCodeToError( - std::error_code(errno, std::system_category())); - } - - if (succeeded(in->readMessage(json))) { - if (llvm::Expected doc = llvm::json::parse(json)) { - if (!handleMessage(std::move(*doc), handler)) - return llvm::Error::success(); - } else { - Logger::error("JSON parse error: {0}", llvm::toString(doc.takeError())); - } - } - } - return llvm::errorCodeToError(std::make_error_code(std::errc::io_error)); -} - -void JSONTransport::sendMessage(llvm::json::Value msg) { - outputBuffer.clear(); - llvm::raw_svector_ostream os(outputBuffer); - os << llvm::formatv(prettyOutput ? "{0:2}\n" : "{0}", msg); - out << "Content-Length: " << outputBuffer.size() << "\r\n\r\n" - << outputBuffer; - out.flush(); - Logger::debug(">>> {0}\n", outputBuffer); -} - -bool JSONTransport::handleMessage(llvm::json::Value msg, - MessageHandler &handler) { - // Message must be an object with "jsonrpc":"2.0". - llvm::json::Object *object = msg.getAsObject(); - if (!object || - object->getString("jsonrpc") != std::optional("2.0")) - return false; - - // `id` may be any JSON value. If absent, this is a notification. - std::optional id; - if (llvm::json::Value *i = object->get("id")) - id = std::move(*i); - std::optional method = object->getString("method"); - - // This is a response. - if (!method) { - if (!id) - return false; - if (auto *err = object->getObject("error")) - return handler.onReply(std::move(*id), decodeError(*err)); - // result should be given, use null if not. - llvm::json::Value result = nullptr; - if (llvm::json::Value *r = object->get("result")) - result = std::move(*r); - return handler.onReply(std::move(*id), std::move(result)); - } - - // Params should be given, use null if not. - llvm::json::Value params = nullptr; - if (llvm::json::Value *p = object->get("params")) - params = std::move(*p); - - if (id) - return handler.onCall(*method, std::move(params), std::move(*id)); - return handler.onNotify(*method, std::move(params)); -} - -/// Tries to read a line up to and including \n. -/// If failing, feof(), ferror(), or shutdownRequested() will be set. -LogicalResult readLine(std::FILE *in, SmallVectorImpl &out) { - // Big enough to hold any reasonable header line. May not fit content lines - // in delimited mode, but performance doesn't matter for that mode. - static constexpr int bufSize = 128; - size_t size = 0; - out.clear(); - for (;;) { - out.resize_for_overwrite(size + bufSize); - if (!std::fgets(&out[size], bufSize, in)) - return failure(); - - clearerr(in); - - // If the line contained null bytes, anything after it (including \n) will - // be ignored. Fortunately this is not a legal header or JSON. - size_t read = std::strlen(&out[size]); - if (read > 0 && out[size + read - 1] == '\n') { - out.resize(size + read); - return success(); - } - size += read; - } -} - -// Returns std::nullopt when: -// - ferror(), feof(), or shutdownRequested() are set. -// - Content-Length is missing or empty (protocol error) -LogicalResult -JSONTransportInputOverFile::readStandardMessage(std::string &json) { - // A Language Server Protocol message starts with a set of HTTP headers, - // delimited by \r\n, and terminated by an empty line (\r\n). - unsigned long long contentLength = 0; - llvm::SmallString<128> line; - while (true) { - if (feof(in) || hasError() || failed(readLine(in, line))) - return failure(); - - // Content-Length is a mandatory header, and the only one we handle. - StringRef lineRef = line; - if (lineRef.consume_front("Content-Length: ")) { - llvm::getAsUnsignedInteger(lineRef.trim(), 0, contentLength); - } else if (!lineRef.trim().empty()) { - // It's another header, ignore it. - continue; - } else { - // An empty line indicates the end of headers. Go ahead and read the JSON. - break; - } - } - - // The fuzzer likes crashing us by sending "Content-Length: 9999999999999999" - if (contentLength == 0 || contentLength > 1 << 30) - return failure(); - - json.resize(contentLength); - for (size_t pos = 0, read; pos < contentLength; pos += read) { - read = std::fread(&json[pos], 1, contentLength - pos, in); - if (read == 0) - return failure(); - - // If we're done, the error was transient. If we're not done, either it was - // transient or we'll see it again on retry. - clearerr(in); - pos += read; - } - return success(); -} - -/// For lit tests we support a simplified syntax: -/// - messages are delimited by '// -----' on a line by itself -/// - lines starting with // are ignored. -/// This is a testing path, so favor simplicity over performance here. -/// When returning failure: feof(), ferror(), or shutdownRequested() will be -/// set. -LogicalResult -JSONTransportInputOverFile::readDelimitedMessage(std::string &json) { - json.clear(); - llvm::SmallString<128> line; - while (succeeded(readLine(in, line))) { - StringRef lineRef = line.str().trim(); - if (lineRef.starts_with("//")) { - // Found a delimiter for the message. - if (lineRef == kDefaultSplitMarker) - break; - continue; - } - - json += line; - } - - return failure(ferror(in)); -} diff --git a/mlir/lib/Tools/mlir-lsp-server/CMakeLists.txt b/mlir/lib/Tools/mlir-lsp-server/CMakeLists.txt index d04d5156fb3c5..e2acba54e5624 100644 --- a/mlir/lib/Tools/mlir-lsp-server/CMakeLists.txt +++ b/mlir/lib/Tools/mlir-lsp-server/CMakeLists.txt @@ -7,6 +7,9 @@ add_mlir_library(MLIRLspServerLib ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Tools/mlir-lsp-server + LINK_COMPONENTS + SupportLSP + LINK_LIBS PUBLIC MLIRBytecodeWriter MLIRFunctionInterfaces diff --git a/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp b/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp index 9b937db0c6a7a..1bbbcdecb57af 100644 --- a/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp @@ -9,8 +9,8 @@ #include "LSPServer.h" #include "MLIRServer.h" #include "Protocol.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Transport.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Transport.h" #include #define DEBUG_TYPE "mlir-lsp-server" @@ -18,6 +18,33 @@ using namespace mlir; using namespace mlir::lsp; +using llvm::lsp::Callback; +using llvm::lsp::CodeAction; +using llvm::lsp::CodeActionParams; +using llvm::lsp::CompletionList; +using llvm::lsp::CompletionParams; +using llvm::lsp::DidChangeTextDocumentParams; +using llvm::lsp::DidCloseTextDocumentParams; +using llvm::lsp::DidOpenTextDocumentParams; +using llvm::lsp::DocumentSymbol; +using llvm::lsp::DocumentSymbolParams; +using llvm::lsp::Hover; +using llvm::lsp::InitializedParams; +using llvm::lsp::InitializeParams; +using llvm::lsp::JSONTransport; +using llvm::lsp::Location; +using llvm::lsp::Logger; +using llvm::lsp::MessageHandler; +using llvm::lsp::MLIRConvertBytecodeParams; +using llvm::lsp::MLIRConvertBytecodeResult; +using llvm::lsp::NoParams; +using llvm::lsp::OutgoingNotification; +using llvm::lsp::PublishDiagnosticsParams; +using llvm::lsp::ReferenceParams; +using llvm::lsp::TextDocumentPositionParams; +using llvm::lsp::TextDocumentSyncKind; +using llvm::lsp::URIForFile; + //===----------------------------------------------------------------------===// // LSPServer //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Tools/mlir-lsp-server/LSPServer.h b/mlir/lib/Tools/mlir-lsp-server/LSPServer.h index 2c50c6b4ac6f5..d652899633255 100644 --- a/mlir/lib/Tools/mlir-lsp-server/LSPServer.h +++ b/mlir/lib/Tools/mlir-lsp-server/LSPServer.h @@ -13,17 +13,19 @@ namespace llvm { struct LogicalResult; +namespace lsp { +class JSONTransport; +} // namespace lsp } // namespace llvm namespace mlir { namespace lsp { -class JSONTransport; class MLIRServer; /// Run the main loop of the LSP server using the given MLIR server and /// transport. llvm::LogicalResult runMlirLSPServer(MLIRServer &server, - JSONTransport &transport); + llvm::lsp::JSONTransport &transport); } // namespace lsp } // namespace mlir diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp index 61987525a5ca5..47b4328d0d9ec 100644 --- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp @@ -16,10 +16,10 @@ #include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Parser/Parser.h" #include "mlir/Support/ToolUtilities.h" -#include "mlir/Tools/lsp-server-support/Logging.h" #include "mlir/Tools/lsp-server-support/SourceMgrUtils.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Base64.h" +#include "llvm/Support/LSP/Logging.h" #include "llvm/Support/SourceMgr.h" #include @@ -39,9 +39,9 @@ static std::optional getLocationFromLoc(StringRef uriScheme, llvm::Expected sourceURI = lsp::URIForFile::fromFile(loc.getFilename(), uriScheme); if (!sourceURI) { - lsp::Logger::error("Failed to create URI for file `{0}`: {1}", - loc.getFilename(), - llvm::toString(sourceURI.takeError())); + llvm::lsp::Logger::error("Failed to create URI for file `{0}`: {1}", + loc.getFilename(), + llvm::toString(sourceURI.takeError())); return std::nullopt; } @@ -217,22 +217,22 @@ static lsp::Diagnostic getLspDiagnoticFromDiag(llvm::SourceMgr &sourceMgr, // Convert the severity for the diagnostic. switch (diag.getSeverity()) { - case DiagnosticSeverity::Note: + case mlir::DiagnosticSeverity::Note: llvm_unreachable("expected notes to be handled separately"); - case DiagnosticSeverity::Warning: - lspDiag.severity = lsp::DiagnosticSeverity::Warning; + case mlir::DiagnosticSeverity::Warning: + lspDiag.severity = llvm::lsp::DiagnosticSeverity::Warning; break; - case DiagnosticSeverity::Error: - lspDiag.severity = lsp::DiagnosticSeverity::Error; + case mlir::DiagnosticSeverity::Error: + lspDiag.severity = llvm::lsp::DiagnosticSeverity::Error; break; - case DiagnosticSeverity::Remark: - lspDiag.severity = lsp::DiagnosticSeverity::Information; + case mlir::DiagnosticSeverity::Remark: + lspDiag.severity = llvm::lsp::DiagnosticSeverity::Information; break; } lspDiag.message = diag.str(); // Attach any notes to the main diagnostic as related information. - std::vector relatedDiags; + std::vector relatedDiags; for (Diagnostic ¬e : diag.getNotes()) { lsp::Location noteLoc; if (std::optional loc = @@ -317,7 +317,7 @@ struct MLIRDocument { void getCodeActionForDiagnostic(const lsp::URIForFile &uri, lsp::Position &pos, StringRef severity, StringRef message, - std::vector &edits); + std::vector &edits); //===--------------------------------------------------------------------===// // Bytecode @@ -355,7 +355,8 @@ MLIRDocument::MLIRDocument(MLIRContext &context, const lsp::URIForFile &uri, // Try to parsed the given IR string. auto memBuffer = llvm::MemoryBuffer::getMemBufferCopy(contents, uri.file()); if (!memBuffer) { - lsp::Logger::error("Failed to create memory buffer for file", uri.file()); + llvm::lsp::Logger::error("Failed to create memory buffer for file", + uri.file()); return; } @@ -695,8 +696,8 @@ void MLIRDocument::findDocumentSymbols( if (SymbolOpInterface symbol = dyn_cast(op)) { symbols.emplace_back(symbol.getName(), isa(op) - ? lsp::SymbolKind::Function - : lsp::SymbolKind::Class, + ? llvm::lsp::SymbolKind::Function + : llvm::lsp::SymbolKind::Class, lsp::Range(sourceMgr, def->scopeLoc), lsp::Range(sourceMgr, def->loc)); childSymbols = &symbols.back().children; @@ -704,9 +705,9 @@ void MLIRDocument::findDocumentSymbols( } else if (op->hasTrait()) { // Otherwise, if this is a symbol table push an anonymous document symbol. symbols.emplace_back("<" + op->getName().getStringRef() + ">", - lsp::SymbolKind::Namespace, - lsp::Range(sourceMgr, def->scopeLoc), - lsp::Range(sourceMgr, def->loc)); + llvm::lsp::SymbolKind::Namespace, + llvm::lsp::Range(sourceMgr, def->scopeLoc), + llvm::lsp::Range(sourceMgr, def->loc)); childSymbols = &symbols.back().children; } } @@ -734,9 +735,9 @@ class LSPCodeCompleteContext : public AsmParserCodeCompleteContext { /// Signal code completion for a dialect name, with an optional prefix. void completeDialectName(StringRef prefix) final { for (StringRef dialect : ctx->getAvailableDialects()) { - lsp::CompletionItem item(prefix + dialect, - lsp::CompletionItemKind::Module, - /*sortText=*/"3"); + llvm::lsp::CompletionItem item(prefix + dialect, + llvm::lsp::CompletionItemKind::Module, + /*sortText=*/"3"); item.detail = "dialect"; completionList.items.emplace_back(item); } @@ -753,9 +754,9 @@ class LSPCodeCompleteContext : public AsmParserCodeCompleteContext { if (&op.getDialect() != dialect) continue; - lsp::CompletionItem item( + llvm::lsp::CompletionItem item( op.getStringRef().drop_front(dialectName.size() + 1), - lsp::CompletionItemKind::Field, + llvm::lsp::CompletionItemKind::Field, /*sortText=*/"1"); item.detail = "operation"; completionList.items.emplace_back(item); @@ -768,7 +769,8 @@ class LSPCodeCompleteContext : public AsmParserCodeCompleteContext { // Check if we need to insert the `%` or not. bool stripPrefix = getCodeCompleteLoc().getPointer()[-1] == '%'; - lsp::CompletionItem item(name, lsp::CompletionItemKind::Variable); + llvm::lsp::CompletionItem item(name, + llvm::lsp::CompletionItemKind::Variable); if (stripPrefix) item.insertText = name.drop_front(1).str(); item.detail = std::move(typeData); @@ -781,7 +783,7 @@ class LSPCodeCompleteContext : public AsmParserCodeCompleteContext { // Check if we need to insert the `^` or not. bool stripPrefix = getCodeCompleteLoc().getPointer()[-1] == '^'; - lsp::CompletionItem item(name, lsp::CompletionItemKind::Field); + llvm::lsp::CompletionItem item(name, llvm::lsp::CompletionItemKind::Field); if (stripPrefix) item.insertText = name.drop_front(1).str(); completionList.items.emplace_back(item); @@ -790,8 +792,9 @@ class LSPCodeCompleteContext : public AsmParserCodeCompleteContext { /// Signal a completion for the given expected token. void completeExpectedTokens(ArrayRef tokens, bool optional) final { for (StringRef token : tokens) { - lsp::CompletionItem item(token, lsp::CompletionItemKind::Keyword, - /*sortText=*/"0"); + llvm::lsp::CompletionItem item(token, + llvm::lsp::CompletionItemKind::Keyword, + /*sortText=*/"0"); item.detail = optional ? "optional" : ""; completionList.items.emplace_back(item); } @@ -802,7 +805,7 @@ class LSPCodeCompleteContext : public AsmParserCodeCompleteContext { appendSimpleCompletions({"affine_set", "affine_map", "dense", "dense_resource", "false", "loc", "sparse", "true", "unit"}, - lsp::CompletionItemKind::Field, + llvm::lsp::CompletionItemKind::Field, /*sortText=*/"1"); completeDialectName("#"); @@ -820,13 +823,14 @@ class LSPCodeCompleteContext : public AsmParserCodeCompleteContext { appendSimpleCompletions({"memref", "tensor", "complex", "tuple", "vector", "bf16", "f16", "f32", "f64", "f80", "f128", "index", "none"}, - lsp::CompletionItemKind::Field, + llvm::lsp::CompletionItemKind::Field, /*sortText=*/"1"); // Handle the builtin integer types. for (StringRef type : {"i", "si", "ui"}) { - lsp::CompletionItem item(type + "", lsp::CompletionItemKind::Field, - /*sortText=*/"1"); + llvm::lsp::CompletionItem item(type + "", + llvm::lsp::CompletionItemKind::Field, + /*sortText=*/"1"); item.insertText = type.str(); completionList.items.emplace_back(item); } @@ -846,9 +850,9 @@ class LSPCodeCompleteContext : public AsmParserCodeCompleteContext { void completeAliases(const llvm::StringMap &aliases, StringRef prefix = "") { for (const auto &alias : aliases) { - lsp::CompletionItem item(prefix + alias.getKey(), - lsp::CompletionItemKind::Field, - /*sortText=*/"2"); + llvm::lsp::CompletionItem item(prefix + alias.getKey(), + llvm::lsp::CompletionItemKind::Field, + /*sortText=*/"2"); llvm::raw_string_ostream(item.detail) << "alias: " << alias.getValue(); completionList.items.emplace_back(item); } @@ -856,7 +860,7 @@ class LSPCodeCompleteContext : public AsmParserCodeCompleteContext { /// Add a set of simple completions that all have the same kind. void appendSimpleCompletions(ArrayRef completions, - lsp::CompletionItemKind kind, + llvm::lsp::CompletionItemKind kind, StringRef sortText = "") { for (StringRef completion : completions) completionList.items.emplace_back(completion, kind, sortText); @@ -897,7 +901,7 @@ MLIRDocument::getCodeCompletion(const lsp::URIForFile &uri, void MLIRDocument::getCodeActionForDiagnostic( const lsp::URIForFile &uri, lsp::Position &pos, StringRef severity, - StringRef message, std::vector &edits) { + StringRef message, std::vector &edits) { // Ignore diagnostics that print the current operation. These are always // enabled for the language server, but not generally during normal // parsing/verification. @@ -913,7 +917,7 @@ void MLIRDocument::getCodeActionForDiagnostic( // Add a text edit for adding an expected-* diagnostic check for this // diagnostic. - lsp::TextEdit edit; + llvm::lsp::TextEdit edit; edit.range = lsp::Range(lsp::Position(pos.line, 0)); // Use the indent of the current line for the expected-* diagnostic. @@ -937,13 +941,14 @@ MLIRDocument::convertToBytecode() { // conceptually be relaxed. if (!llvm::hasSingleElement(parsedIR)) { if (parsedIR.empty()) { - return llvm::make_error( + return llvm::make_error( "expected a single and valid top-level operation, please ensure " "there are no errors", - lsp::ErrorCode::RequestFailed); + llvm::lsp::ErrorCode::RequestFailed); } - return llvm::make_error( - "expected a single top-level operation", lsp::ErrorCode::RequestFailed); + return llvm::make_error( + "expected a single top-level operation", + llvm::lsp::ErrorCode::RequestFailed); } lsp::MLIRConvertBytecodeResult result; @@ -1134,7 +1139,7 @@ void MLIRTextFile::findDocumentSymbols( lsp::Position endPos((i == e - 1) ? totalNumLines - 1 : chunks[i + 1]->lineOffset); lsp::DocumentSymbol symbol("", - lsp::SymbolKind::Namespace, + llvm::lsp::SymbolKind::Namespace, /*range=*/lsp::Range(startPos, endPos), /*selectionRange=*/lsp::Range(startPos)); chunk.document.findDocumentSymbols(symbol.children); @@ -1167,10 +1172,10 @@ lsp::CompletionList MLIRTextFile::getCodeCompletion(const lsp::URIForFile &uri, uri, completePos, context.getDialectRegistry()); // Adjust any completion locations. - for (lsp::CompletionItem &item : completionList.items) { + for (llvm::lsp::CompletionItem &item : completionList.items) { if (item.textEdit) chunk.adjustLocForChunkOffset(item.textEdit->range); - for (lsp::TextEdit &edit : item.additionalTextEdits) + for (llvm::lsp::TextEdit &edit : item.additionalTextEdits) chunk.adjustLocForChunkOffset(edit.range); } return completionList; @@ -1194,10 +1199,10 @@ void MLIRTextFile::getCodeActions(const lsp::URIForFile &uri, StringRef severity; switch (diag.severity) { - case lsp::DiagnosticSeverity::Error: + case llvm::lsp::DiagnosticSeverity::Error: severity = "error"; break; - case lsp::DiagnosticSeverity::Warning: + case llvm::lsp::DiagnosticSeverity::Warning: severity = "warning"; break; default: @@ -1205,7 +1210,7 @@ void MLIRTextFile::getCodeActions(const lsp::URIForFile &uri, } // Get edits for the diagnostic. - std::vector edits; + std::vector edits; chunk.document.getCodeActionForDiagnostic(uri, diagPos, severity, diag.message, edits); @@ -1221,7 +1226,7 @@ void MLIRTextFile::getCodeActions(const lsp::URIForFile &uri, } } // Fixup the locations for any edits. - for (lsp::TextEdit &edit : edits) + for (llvm::lsp::TextEdit &edit : edits) chunk.adjustLocForChunkOffset(edit.range); action.edit.emplace(); @@ -1236,9 +1241,9 @@ llvm::Expected MLIRTextFile::convertToBytecode() { // Bail out if there is more than one chunk, bytecode wants a single module. if (chunks.size() != 1) { - return llvm::make_error( + return llvm::make_error( "unexpected split file, please remove all `// -----`", - lsp::ErrorCode::RequestFailed); + llvm::lsp::ErrorCode::RequestFailed); } return chunks.front()->document.convertToBytecode(); } @@ -1283,7 +1288,7 @@ lsp::MLIRServer::~MLIRServer() = default; void lsp::MLIRServer::addOrUpdateDocument( const URIForFile &uri, StringRef contents, int64_t version, - std::vector &diagnostics) { + std::vector &diagnostics) { impl->files[uri.file()] = std::make_unique( uri, contents, version, impl->registry_fn, diagnostics); } @@ -1298,17 +1303,17 @@ std::optional lsp::MLIRServer::removeDocument(const URIForFile &uri) { return version; } -void lsp::MLIRServer::getLocationsOf(const URIForFile &uri, - const Position &defPos, - std::vector &locations) { +void lsp::MLIRServer::getLocationsOf( + const URIForFile &uri, const Position &defPos, + std::vector &locations) { auto fileIt = impl->files.find(uri.file()); if (fileIt != impl->files.end()) fileIt->second->getLocationsOf(uri, defPos, locations); } -void lsp::MLIRServer::findReferencesOf(const URIForFile &uri, - const Position &pos, - std::vector &references) { +void lsp::MLIRServer::findReferencesOf( + const URIForFile &uri, const Position &pos, + std::vector &references) { auto fileIt = impl->files.find(uri.file()); if (fileIt != impl->files.end()) fileIt->second->findReferencesOf(uri, pos, references); @@ -1367,17 +1372,17 @@ lsp::MLIRServer::convertFromBytecode(const URIForFile &uri) { // Try to parse the given source file. Block parsedBlock; if (failed(parseSourceFile(uri.file(), &parsedBlock, parserConfig))) { - return llvm::make_error( + return llvm::make_error( "failed to parse bytecode source file: " + errorMsg, - lsp::ErrorCode::RequestFailed); + llvm::lsp::ErrorCode::RequestFailed); } // TODO: We currently expect a single top-level operation, but this could // conceptually be relaxed. if (!llvm::hasSingleElement(parsedBlock)) { - return llvm::make_error( + return llvm::make_error( "expected bytecode to contain a single top-level operation", - lsp::ErrorCode::RequestFailed); + llvm::lsp::ErrorCode::RequestFailed); } // Print the module to a buffer. @@ -1401,9 +1406,9 @@ llvm::Expected lsp::MLIRServer::convertToBytecode(const URIForFile &uri) { auto fileIt = impl->files.find(uri.file()); if (fileIt == impl->files.end()) { - return llvm::make_error( + return llvm::make_error( "language server does not contain an entry for this source file", - lsp::ErrorCode::RequestFailed); + llvm::lsp::ErrorCode::RequestFailed); } return fileIt->second->convertToBytecode(); } diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.h b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.h index 85e69e69f6631..31a01fec8bbc9 100644 --- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.h +++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.h @@ -9,6 +9,7 @@ #ifndef LIB_MLIR_TOOLS_MLIRLSPSERVER_SERVER_H_ #define LIB_MLIR_TOOLS_MLIRLSPSERVER_SERVER_H_ +#include "Protocol.h" #include "mlir/Support/LLVM.h" #include "mlir/Tools/mlir-lsp-server/MlirLspRegistryFunction.h" #include "llvm/Support/Error.h" @@ -19,16 +20,17 @@ namespace mlir { class DialectRegistry; namespace lsp { -struct CodeAction; -struct CodeActionContext; -struct CompletionList; -struct Diagnostic; -struct DocumentSymbol; -struct Hover; -struct Location; -struct MLIRConvertBytecodeResult; -struct Position; -struct Range; +using llvm::lsp::CodeAction; +using llvm::lsp::CodeActionContext; +using llvm::lsp::CompletionList; +using llvm::lsp::Diagnostic; +using llvm::lsp::DocumentSymbol; +using llvm::lsp::Hover; +using llvm::lsp::Location; +using llvm::lsp::MLIRConvertBytecodeResult; +using llvm::lsp::Position; +using llvm::lsp::Range; +using llvm::lsp::URIForFile; /// This class implements all of the MLIR related functionality necessary for a /// language server. This class allows for keeping the MLIR specific logic diff --git a/mlir/lib/Tools/mlir-lsp-server/MlirLspServerMain.cpp b/mlir/lib/Tools/mlir-lsp-server/MlirLspServerMain.cpp index f1dc32615c6a3..d4589b240e39e 100644 --- a/mlir/lib/Tools/mlir-lsp-server/MlirLspServerMain.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/MlirLspServerMain.cpp @@ -9,14 +9,18 @@ #include "mlir/Tools/mlir-lsp-server/MlirLspServerMain.h" #include "LSPServer.h" #include "MLIRServer.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Transport.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Transport.h" #include "llvm/Support/Program.h" using namespace mlir; using namespace mlir::lsp; +using llvm::lsp::JSONStreamStyle; +using llvm::lsp::JSONTransport; +using llvm::lsp::Logger; + LogicalResult mlir::MlirLspServerMain(int argc, char **argv, DialectRegistryFn registry_fn) { llvm::cl::opt inputStyle{ diff --git a/mlir/lib/Tools/mlir-lsp-server/Protocol.cpp b/mlir/lib/Tools/mlir-lsp-server/Protocol.cpp index a56e9a10f03f1..28aded304d388 100644 --- a/mlir/lib/Tools/mlir-lsp-server/Protocol.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/Protocol.cpp @@ -13,14 +13,11 @@ #include "Protocol.h" #include "llvm/Support/JSON.h" -using namespace mlir; -using namespace mlir::lsp; - //===----------------------------------------------------------------------===// // MLIRConvertBytecodeParams //===----------------------------------------------------------------------===// -bool mlir::lsp::fromJSON(const llvm::json::Value &value, +bool llvm::lsp::fromJSON(const llvm::json::Value &value, MLIRConvertBytecodeParams &result, llvm::json::Path path) { llvm::json::ObjectMapper o(value, path); @@ -31,6 +28,6 @@ bool mlir::lsp::fromJSON(const llvm::json::Value &value, // MLIRConvertBytecodeResult //===----------------------------------------------------------------------===// -llvm::json::Value mlir::lsp::toJSON(const MLIRConvertBytecodeResult &value) { +llvm::json::Value llvm::lsp::toJSON(const MLIRConvertBytecodeResult &value) { return llvm::json::Object{{"output", value.output}}; } diff --git a/mlir/lib/Tools/mlir-lsp-server/Protocol.h b/mlir/lib/Tools/mlir-lsp-server/Protocol.h index d910780e1ee92..ed0db4e591d8f 100644 --- a/mlir/lib/Tools/mlir-lsp-server/Protocol.h +++ b/mlir/lib/Tools/mlir-lsp-server/Protocol.h @@ -20,9 +20,9 @@ #ifndef LIB_MLIR_TOOLS_MLIRLSPSERVER_PROTOCOL_H_ #define LIB_MLIR_TOOLS_MLIRLSPSERVER_PROTOCOL_H_ -#include "mlir/Tools/lsp-server-support/Protocol.h" +#include "llvm/Support/LSP/Protocol.h" -namespace mlir { +namespace llvm { namespace lsp { //===----------------------------------------------------------------------===// // MLIRConvertBytecodeParams @@ -54,6 +54,6 @@ struct MLIRConvertBytecodeResult { llvm::json::Value toJSON(const MLIRConvertBytecodeResult &value); } // namespace lsp -} // namespace mlir +} // namespace llvm #endif diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt b/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt index bf25b7e0a64f3..b41603fb67eb0 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt @@ -7,6 +7,9 @@ llvm_add_library(MLIRPdllLspServerLib ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Tools/mlir-pdll-lsp-server + LINK_COMPONENTS + SupportLSP + LINK_LIBS PUBLIC MLIRPDLLCodeGen MLIRPDLLParser diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.cpp index 82542a12a1807..7b23adcc7e2e1 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.cpp +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.cpp @@ -10,8 +10,9 @@ #include "PDLLServer.h" #include "Protocol.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Transport.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Protocol.h" +#include "llvm/Support/LSP/Transport.h" #include #define DEBUG_TYPE "pdll-lsp-server" @@ -19,6 +20,30 @@ using namespace mlir; using namespace mlir::lsp; +using llvm::lsp::Callback; +using llvm::lsp::CompletionList; +using llvm::lsp::CompletionParams; +using llvm::lsp::DidChangeTextDocumentParams; +using llvm::lsp::DidCloseTextDocumentParams; +using llvm::lsp::DidOpenTextDocumentParams; +using llvm::lsp::DocumentLinkParams; +using llvm::lsp::DocumentSymbol; +using llvm::lsp::DocumentSymbolParams; +using llvm::lsp::Hover; +using llvm::lsp::InitializedParams; +using llvm::lsp::InitializeParams; +using llvm::lsp::InlayHintsParams; +using llvm::lsp::JSONTransport; +using llvm::lsp::Location; +using llvm::lsp::Logger; +using llvm::lsp::MessageHandler; +using llvm::lsp::NoParams; +using llvm::lsp::OutgoingNotification; +using llvm::lsp::PublishDiagnosticsParams; +using llvm::lsp::ReferenceParams; +using llvm::lsp::TextDocumentPositionParams; +using llvm::lsp::TextDocumentSyncKind; + //===----------------------------------------------------------------------===// // LSPServer //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.h b/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.h index 78c4c31100cbc..42c0a5d7b6d2b 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.h +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.h @@ -13,17 +13,19 @@ namespace llvm { struct LogicalResult; +namespace lsp { +class JSONTransport; +} // namespace lsp } // namespace llvm namespace mlir { namespace lsp { -class JSONTransport; class PDLLServer; /// Run the main loop of the LSP server using the given PDLL server and /// transport. llvm::LogicalResult runPdllLSPServer(PDLLServer &server, - JSONTransport &transport); + llvm::lsp::JSONTransport &transport); } // namespace lsp } // namespace mlir diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/MlirPdllLspServerMain.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/MlirPdllLspServerMain.cpp index 287a131ecd17d..5dea130675cdb 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/MlirPdllLspServerMain.cpp +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/MlirPdllLspServerMain.cpp @@ -9,14 +9,17 @@ #include "mlir/Tools/mlir-pdll-lsp-server/MlirPdllLspServerMain.h" #include "LSPServer.h" #include "PDLLServer.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Transport.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Transport.h" #include "llvm/Support/Program.h" using namespace mlir; using namespace mlir::lsp; +using llvm::lsp::JSONStreamStyle; +using llvm::lsp::Logger; + LogicalResult mlir::MlirPdllLspServerMain(int argc, char **argv) { llvm::cl::opt inputStyle{ "input-style", @@ -72,7 +75,8 @@ LogicalResult mlir::MlirPdllLspServerMain(int argc, char **argv) { // Configure the transport used for communication. llvm::sys::ChangeStdinToBinary(); - JSONTransport transport(stdin, llvm::outs(), inputStyle, prettyPrint); + llvm::lsp::JSONTransport transport(stdin, llvm::outs(), inputStyle, + prettyPrint); // Configure the servers and start the main language server. PDLLServer::Options options(compilationDatabases, extraIncludeDirs); diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp index 84f529ae16401..60b9567ff7804 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp @@ -23,13 +23,13 @@ #include "mlir/Tools/PDLL/Parser/CodeComplete.h" #include "mlir/Tools/PDLL/Parser/Parser.h" #include "mlir/Tools/lsp-server-support/CompilationDatabase.h" -#include "mlir/Tools/lsp-server-support/Logging.h" #include "mlir/Tools/lsp-server-support/SourceMgrUtils.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/LSP/Logging.h" #include "llvm/Support/Path.h" #include @@ -38,17 +38,19 @@ using namespace mlir::pdll; /// Returns a language server uri for the given source location. `mainFileURI` /// corresponds to the uri for the main file of the source manager. -static lsp::URIForFile getURIFromLoc(llvm::SourceMgr &mgr, SMRange loc, - const lsp::URIForFile &mainFileURI) { +static llvm::lsp::URIForFile +getURIFromLoc(llvm::SourceMgr &mgr, SMRange loc, + const llvm::lsp::URIForFile &mainFileURI) { int bufferId = mgr.FindBufferContainingLoc(loc.Start); if (bufferId == 0 || bufferId == static_cast(mgr.getMainFileID())) return mainFileURI; - llvm::Expected fileForLoc = lsp::URIForFile::fromFile( - mgr.getBufferInfo(bufferId).Buffer->getBufferIdentifier()); + llvm::Expected fileForLoc = + llvm::lsp::URIForFile::fromFile( + mgr.getBufferInfo(bufferId).Buffer->getBufferIdentifier()); if (fileForLoc) return *fileForLoc; - lsp::Logger::error("Failed to create URI for include file: {0}", - llvm::toString(fileForLoc.takeError())); + llvm::lsp::Logger::error("Failed to create URI for include file: {0}", + llvm::toString(fileForLoc.takeError())); return mainFileURI; } @@ -59,16 +61,18 @@ static bool isMainFileLoc(llvm::SourceMgr &mgr, SMRange loc) { } /// Returns a language server location from the given source range. -static lsp::Location getLocationFromLoc(llvm::SourceMgr &mgr, SMRange range, - const lsp::URIForFile &uri) { - return lsp::Location(getURIFromLoc(mgr, range, uri), lsp::Range(mgr, range)); +static llvm::lsp::Location +getLocationFromLoc(llvm::SourceMgr &mgr, SMRange range, + const llvm::lsp::URIForFile &uri) { + return llvm::lsp::Location(getURIFromLoc(mgr, range, uri), + llvm::lsp::Range(mgr, range)); } /// Convert the given MLIR diagnostic to the LSP form. -static std::optional +static std::optional getLspDiagnoticFromDiag(llvm::SourceMgr &sourceMgr, const ast::Diagnostic &diag, - const lsp::URIForFile &uri) { - lsp::Diagnostic lspDiag; + const llvm::lsp::URIForFile &uri) { + llvm::lsp::Diagnostic lspDiag; lspDiag.source = "pdll"; // FIXME: Right now all of the diagnostics are treated as parser issues, but @@ -76,7 +80,8 @@ getLspDiagnoticFromDiag(llvm::SourceMgr &sourceMgr, const ast::Diagnostic &diag, lspDiag.category = "Parse Error"; // Try to grab a file location for this diagnostic. - lsp::Location loc = getLocationFromLoc(sourceMgr, diag.getLocation(), uri); + llvm::lsp::Location loc = + getLocationFromLoc(sourceMgr, diag.getLocation(), uri); lspDiag.range = loc.range; // Skip diagnostics that weren't emitted within the main file. @@ -88,19 +93,19 @@ getLspDiagnoticFromDiag(llvm::SourceMgr &sourceMgr, const ast::Diagnostic &diag, case ast::Diagnostic::Severity::DK_Note: llvm_unreachable("expected notes to be handled separately"); case ast::Diagnostic::Severity::DK_Warning: - lspDiag.severity = lsp::DiagnosticSeverity::Warning; + lspDiag.severity = llvm::lsp::DiagnosticSeverity::Warning; break; case ast::Diagnostic::Severity::DK_Error: - lspDiag.severity = lsp::DiagnosticSeverity::Error; + lspDiag.severity = llvm::lsp::DiagnosticSeverity::Error; break; case ast::Diagnostic::Severity::DK_Remark: - lspDiag.severity = lsp::DiagnosticSeverity::Information; + lspDiag.severity = llvm::lsp::DiagnosticSeverity::Information; break; } lspDiag.message = diag.getMessage().str(); // Attach any notes to the main diagnostic as related information. - std::vector relatedDiags; + std::vector relatedDiags; for (const ast::Diagnostic ¬e : diag.getNotes()) { relatedDiags.emplace_back( getLocationFromLoc(sourceMgr, note.getLocation(), uri), @@ -259,9 +264,9 @@ namespace { /// This class represents all of the information pertaining to a specific PDL /// document. struct PDLDocument { - PDLDocument(const lsp::URIForFile &uri, StringRef contents, + PDLDocument(const llvm::lsp::URIForFile &uri, StringRef contents, const std::vector &extraDirs, - std::vector &diagnostics); + std::vector &diagnostics); PDLDocument(const PDLDocument &) = delete; PDLDocument &operator=(const PDLDocument &) = delete; @@ -269,76 +274,83 @@ struct PDLDocument { // Definitions and References //===--------------------------------------------------------------------===// - void getLocationsOf(const lsp::URIForFile &uri, const lsp::Position &defPos, - std::vector &locations); - void findReferencesOf(const lsp::URIForFile &uri, const lsp::Position &pos, - std::vector &references); + void getLocationsOf(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &defPos, + std::vector &locations); + void findReferencesOf(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &pos, + std::vector &references); //===--------------------------------------------------------------------===// // Document Links //===--------------------------------------------------------------------===// - void getDocumentLinks(const lsp::URIForFile &uri, - std::vector &links); + void getDocumentLinks(const llvm::lsp::URIForFile &uri, + std::vector &links); //===--------------------------------------------------------------------===// // Hover //===--------------------------------------------------------------------===// - std::optional findHover(const lsp::URIForFile &uri, - const lsp::Position &hoverPos); - std::optional findHover(const ast::Decl *decl, - const SMRange &hoverRange); - lsp::Hover buildHoverForOpName(const ods::Operation *op, - const SMRange &hoverRange); - lsp::Hover buildHoverForVariable(const ast::VariableDecl *varDecl, - const SMRange &hoverRange); - lsp::Hover buildHoverForPattern(const ast::PatternDecl *decl, - const SMRange &hoverRange); - lsp::Hover buildHoverForCoreConstraint(const ast::CoreConstraintDecl *decl, + std::optional + findHover(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &hoverPos); + std::optional findHover(const ast::Decl *decl, + const SMRange &hoverRange); + llvm::lsp::Hover buildHoverForOpName(const ods::Operation *op, + const SMRange &hoverRange); + llvm::lsp::Hover buildHoverForVariable(const ast::VariableDecl *varDecl, const SMRange &hoverRange); + llvm::lsp::Hover buildHoverForPattern(const ast::PatternDecl *decl, + const SMRange &hoverRange); + llvm::lsp::Hover + buildHoverForCoreConstraint(const ast::CoreConstraintDecl *decl, + const SMRange &hoverRange); template - lsp::Hover buildHoverForUserConstraintOrRewrite(StringRef typeName, - const T *decl, - const SMRange &hoverRange); + llvm::lsp::Hover + buildHoverForUserConstraintOrRewrite(StringRef typeName, const T *decl, + const SMRange &hoverRange); //===--------------------------------------------------------------------===// // Document Symbols //===--------------------------------------------------------------------===// - void findDocumentSymbols(std::vector &symbols); + void findDocumentSymbols(std::vector &symbols); //===--------------------------------------------------------------------===// // Code Completion //===--------------------------------------------------------------------===// - lsp::CompletionList getCodeCompletion(const lsp::URIForFile &uri, - const lsp::Position &completePos); + llvm::lsp::CompletionList + getCodeCompletion(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &completePos); //===--------------------------------------------------------------------===// // Signature Help //===--------------------------------------------------------------------===// - lsp::SignatureHelp getSignatureHelp(const lsp::URIForFile &uri, - const lsp::Position &helpPos); + llvm::lsp::SignatureHelp getSignatureHelp(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &helpPos); //===--------------------------------------------------------------------===// // Inlay Hints //===--------------------------------------------------------------------===// - void getInlayHints(const lsp::URIForFile &uri, const lsp::Range &range, - std::vector &inlayHints); + void getInlayHints(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Range &range, + std::vector &inlayHints); void getInlayHintsFor(const ast::VariableDecl *decl, - const lsp::URIForFile &uri, - std::vector &inlayHints); - void getInlayHintsFor(const ast::CallExpr *expr, const lsp::URIForFile &uri, - std::vector &inlayHints); + const llvm::lsp::URIForFile &uri, + std::vector &inlayHints); + void getInlayHintsFor(const ast::CallExpr *expr, + const llvm::lsp::URIForFile &uri, + std::vector &inlayHints); void getInlayHintsFor(const ast::OperationExpr *expr, - const lsp::URIForFile &uri, - std::vector &inlayHints); + const llvm::lsp::URIForFile &uri, + std::vector &inlayHints); /// Add a parameter hint for the given expression using `label`. - void addParameterHintFor(std::vector &inlayHints, + void addParameterHintFor(std::vector &inlayHints, const ast::Expr *expr, StringRef label); //===--------------------------------------------------------------------===// @@ -372,13 +384,14 @@ struct PDLDocument { }; } // namespace -PDLDocument::PDLDocument(const lsp::URIForFile &uri, StringRef contents, +PDLDocument::PDLDocument(const llvm::lsp::URIForFile &uri, StringRef contents, const std::vector &extraDirs, - std::vector &diagnostics) + std::vector &diagnostics) : astContext(odsContext) { auto memBuffer = llvm::MemoryBuffer::getMemBufferCopy(contents, uri.file()); if (!memBuffer) { - lsp::Logger::error("Failed to create memory buffer for file", uri.file()); + llvm::lsp::Logger::error("Failed to create memory buffer for file", + uri.file()); return; } @@ -412,9 +425,9 @@ PDLDocument::PDLDocument(const lsp::URIForFile &uri, StringRef contents, // PDLDocument: Definitions and References //===----------------------------------------------------------------------===// -void PDLDocument::getLocationsOf(const lsp::URIForFile &uri, - const lsp::Position &defPos, - std::vector &locations) { +void PDLDocument::getLocationsOf(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &defPos, + std::vector &locations) { SMLoc posLoc = defPos.getAsSMLoc(sourceMgr); const PDLIndexSymbol *symbol = index.lookup(posLoc); if (!symbol) @@ -423,9 +436,9 @@ void PDLDocument::getLocationsOf(const lsp::URIForFile &uri, locations.push_back(getLocationFromLoc(sourceMgr, symbol->getDefLoc(), uri)); } -void PDLDocument::findReferencesOf(const lsp::URIForFile &uri, - const lsp::Position &pos, - std::vector &references) { +void PDLDocument::findReferencesOf( + const llvm::lsp::URIForFile &uri, const llvm::lsp::Position &pos, + std::vector &references) { SMLoc posLoc = pos.getAsSMLoc(sourceMgr); const PDLIndexSymbol *symbol = index.lookup(posLoc); if (!symbol) @@ -440,8 +453,9 @@ void PDLDocument::findReferencesOf(const lsp::URIForFile &uri, // PDLDocument: Document Links //===--------------------------------------------------------------------===// -void PDLDocument::getDocumentLinks(const lsp::URIForFile &uri, - std::vector &links) { +void PDLDocument::getDocumentLinks( + const llvm::lsp::URIForFile &uri, + std::vector &links) { for (const lsp::SourceMgrInclude &include : parsedIncludes) links.emplace_back(include.range, include.uri); } @@ -450,9 +464,9 @@ void PDLDocument::getDocumentLinks(const lsp::URIForFile &uri, // PDLDocument: Hover //===----------------------------------------------------------------------===// -std::optional -PDLDocument::findHover(const lsp::URIForFile &uri, - const lsp::Position &hoverPos) { +std::optional +PDLDocument::findHover(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &hoverPos) { SMLoc posLoc = hoverPos.getAsSMLoc(sourceMgr); // Check for a reference to an include. @@ -474,8 +488,8 @@ PDLDocument::findHover(const lsp::URIForFile &uri, return findHover(decl, hoverRange); } -std::optional PDLDocument::findHover(const ast::Decl *decl, - const SMRange &hoverRange) { +std::optional +PDLDocument::findHover(const ast::Decl *decl, const SMRange &hoverRange) { // Add hover for variables. if (const auto *varDecl = dyn_cast(decl)) return buildHoverForVariable(varDecl, hoverRange); @@ -499,9 +513,9 @@ std::optional PDLDocument::findHover(const ast::Decl *decl, return std::nullopt; } -lsp::Hover PDLDocument::buildHoverForOpName(const ods::Operation *op, - const SMRange &hoverRange) { - lsp::Hover hover(lsp::Range(sourceMgr, hoverRange)); +llvm::lsp::Hover PDLDocument::buildHoverForOpName(const ods::Operation *op, + const SMRange &hoverRange) { + llvm::lsp::Hover hover(llvm::lsp::Range(sourceMgr, hoverRange)); { llvm::raw_string_ostream hoverOS(hover.contents.value); hoverOS << "**OpName**: `" << op->getName() << "`\n***\n" @@ -511,9 +525,10 @@ lsp::Hover PDLDocument::buildHoverForOpName(const ods::Operation *op, return hover; } -lsp::Hover PDLDocument::buildHoverForVariable(const ast::VariableDecl *varDecl, - const SMRange &hoverRange) { - lsp::Hover hover(lsp::Range(sourceMgr, hoverRange)); +llvm::lsp::Hover +PDLDocument::buildHoverForVariable(const ast::VariableDecl *varDecl, + const SMRange &hoverRange) { + llvm::lsp::Hover hover(llvm::lsp::Range(sourceMgr, hoverRange)); { llvm::raw_string_ostream hoverOS(hover.contents.value); hoverOS << "**Variable**: `" << varDecl->getName().getName() << "`\n***\n" @@ -522,9 +537,9 @@ lsp::Hover PDLDocument::buildHoverForVariable(const ast::VariableDecl *varDecl, return hover; } -lsp::Hover PDLDocument::buildHoverForPattern(const ast::PatternDecl *decl, - const SMRange &hoverRange) { - lsp::Hover hover(lsp::Range(sourceMgr, hoverRange)); +llvm::lsp::Hover PDLDocument::buildHoverForPattern(const ast::PatternDecl *decl, + const SMRange &hoverRange) { + llvm::lsp::Hover hover(llvm::lsp::Range(sourceMgr, hoverRange)); { llvm::raw_string_ostream hoverOS(hover.contents.value); hoverOS << "**Pattern**"; @@ -545,10 +560,10 @@ lsp::Hover PDLDocument::buildHoverForPattern(const ast::PatternDecl *decl, return hover; } -lsp::Hover +llvm::lsp::Hover PDLDocument::buildHoverForCoreConstraint(const ast::CoreConstraintDecl *decl, const SMRange &hoverRange) { - lsp::Hover hover(lsp::Range(sourceMgr, hoverRange)); + llvm::lsp::Hover hover(llvm::lsp::Range(sourceMgr, hoverRange)); { llvm::raw_string_ostream hoverOS(hover.contents.value); hoverOS << "**Constraint**: `"; @@ -573,9 +588,9 @@ PDLDocument::buildHoverForCoreConstraint(const ast::CoreConstraintDecl *decl, } template -lsp::Hover PDLDocument::buildHoverForUserConstraintOrRewrite( +llvm::lsp::Hover PDLDocument::buildHoverForUserConstraintOrRewrite( StringRef typeName, const T *decl, const SMRange &hoverRange) { - lsp::Hover hover(lsp::Range(sourceMgr, hoverRange)); + llvm::lsp::Hover hover(llvm::lsp::Range(sourceMgr, hoverRange)); { llvm::raw_string_ostream hoverOS(hover.contents.value); hoverOS << "**" << typeName << "**: `" << decl->getName().getName() @@ -617,7 +632,7 @@ lsp::Hover PDLDocument::buildHoverForUserConstraintOrRewrite( //===----------------------------------------------------------------------===// void PDLDocument::findDocumentSymbols( - std::vector &symbols) { + std::vector &symbols) { if (failed(astModule)) return; @@ -631,25 +646,28 @@ void PDLDocument::findDocumentSymbols( SMRange nameLoc = name ? name->getLoc() : patternDecl->getLoc(); SMRange bodyLoc(nameLoc.Start, patternDecl->getBody()->getLoc().End); - symbols.emplace_back( - name ? name->getName() : "", lsp::SymbolKind::Class, - lsp::Range(sourceMgr, bodyLoc), lsp::Range(sourceMgr, nameLoc)); + symbols.emplace_back(name ? name->getName() : "", + llvm::lsp::SymbolKind::Class, + llvm::lsp::Range(sourceMgr, bodyLoc), + llvm::lsp::Range(sourceMgr, nameLoc)); } else if (const auto *cDecl = dyn_cast(decl)) { // TODO: Add source information for the code block body. SMRange nameLoc = cDecl->getName().getLoc(); SMRange bodyLoc = nameLoc; - symbols.emplace_back( - cDecl->getName().getName(), lsp::SymbolKind::Function, - lsp::Range(sourceMgr, bodyLoc), lsp::Range(sourceMgr, nameLoc)); + symbols.emplace_back(cDecl->getName().getName(), + llvm::lsp::SymbolKind::Function, + llvm::lsp::Range(sourceMgr, bodyLoc), + llvm::lsp::Range(sourceMgr, nameLoc)); } else if (const auto *cDecl = dyn_cast(decl)) { // TODO: Add source information for the code block body. SMRange nameLoc = cDecl->getName().getLoc(); SMRange bodyLoc = nameLoc; - symbols.emplace_back( - cDecl->getName().getName(), lsp::SymbolKind::Function, - lsp::Range(sourceMgr, bodyLoc), lsp::Range(sourceMgr, nameLoc)); + symbols.emplace_back(cDecl->getName().getName(), + llvm::lsp::SymbolKind::Function, + llvm::lsp::Range(sourceMgr, bodyLoc), + llvm::lsp::Range(sourceMgr, nameLoc)); } } } @@ -662,7 +680,7 @@ namespace { class LSPCodeCompleteContext : public CodeCompleteContext { public: LSPCodeCompleteContext(SMLoc completeLoc, llvm::SourceMgr &sourceMgr, - lsp::CompletionList &completionList, + llvm::lsp::CompletionList &completionList, ods::Context &odsContext, ArrayRef includeDirs) : CodeCompleteContext(completeLoc), sourceMgr(sourceMgr), @@ -674,13 +692,13 @@ class LSPCodeCompleteContext : public CodeCompleteContext { ArrayRef elementNames = tupleType.getElementNames(); for (unsigned i = 0, e = tupleType.size(); i < e; ++i) { // Push back a completion item that uses the result index. - lsp::CompletionItem item; + llvm::lsp::CompletionItem item; item.label = llvm::formatv("{0} (field #{0})", i).str(); item.insertText = Twine(i).str(); item.filterText = item.sortText = item.insertText; - item.kind = lsp::CompletionItemKind::Field; + item.kind = llvm::lsp::CompletionItemKind::Field; item.detail = llvm::formatv("{0}: {1}", i, elementTypes[i]); - item.insertTextFormat = lsp::InsertTextFormat::PlainText; + item.insertTextFormat = llvm::lsp::InsertTextFormat::PlainText; completionList.items.emplace_back(item); // If the element has a name, push back a completion item with that name. @@ -705,11 +723,11 @@ class LSPCodeCompleteContext : public CodeCompleteContext { const ods::TypeConstraint &constraint = result.getConstraint(); // Push back a completion item that uses the result index. - lsp::CompletionItem item; + llvm::lsp::CompletionItem item; item.label = llvm::formatv("{0} (field #{0})", it.index()).str(); item.insertText = Twine(it.index()).str(); item.filterText = item.sortText = item.insertText; - item.kind = lsp::CompletionItemKind::Field; + item.kind = llvm::lsp::CompletionItemKind::Field; switch (result.getVariableLengthKind()) { case ods::VariableLengthKind::Single: item.detail = llvm::formatv("{0}: Value", it.index()).str(); @@ -721,12 +739,12 @@ class LSPCodeCompleteContext : public CodeCompleteContext { item.detail = llvm::formatv("{0}: ValueRange", it.index()).str(); break; } - item.documentation = lsp::MarkupContent{ - lsp::MarkupKind::Markdown, + item.documentation = llvm::lsp::MarkupContent{ + llvm::lsp::MarkupKind::Markdown, llvm::formatv("{0}\n\n```c++\n{1}\n```\n", constraint.getSummary(), constraint.getCppClass()) .str()}; - item.insertTextFormat = lsp::InsertTextFormat::PlainText; + item.insertTextFormat = llvm::lsp::InsertTextFormat::PlainText; completionList.items.emplace_back(item); // If the result has a name, push back a completion item with the result @@ -750,16 +768,16 @@ class LSPCodeCompleteContext : public CodeCompleteContext { for (const ods::Attribute &attr : odsOp->getAttributes()) { const ods::AttributeConstraint &constraint = attr.getConstraint(); - lsp::CompletionItem item; + llvm::lsp::CompletionItem item; item.label = attr.getName().str(); - item.kind = lsp::CompletionItemKind::Field; + item.kind = llvm::lsp::CompletionItemKind::Field; item.detail = attr.isOptional() ? "optional" : ""; - item.documentation = lsp::MarkupContent{ - lsp::MarkupKind::Markdown, + item.documentation = llvm::lsp::MarkupContent{ + llvm::lsp::MarkupKind::Markdown, llvm::formatv("{0}\n\n```c++\n{1}\n```\n", constraint.getSummary(), constraint.getCppClass()) .str()}; - item.insertTextFormat = lsp::InsertTextFormat::PlainText; + item.insertTextFormat = llvm::lsp::InsertTextFormat::PlainText; completionList.items.emplace_back(item); } } @@ -769,18 +787,18 @@ class LSPCodeCompleteContext : public CodeCompleteContext { const ast::DeclScope *scope) final { auto addCoreConstraint = [&](StringRef constraint, StringRef mlirType, StringRef snippetText = "") { - lsp::CompletionItem item; + llvm::lsp::CompletionItem item; item.label = constraint.str(); - item.kind = lsp::CompletionItemKind::Class; + item.kind = llvm::lsp::CompletionItemKind::Class; item.detail = (constraint + " constraint").str(); - item.documentation = lsp::MarkupContent{ - lsp::MarkupKind::Markdown, + item.documentation = llvm::lsp::MarkupContent{ + llvm::lsp::MarkupKind::Markdown, ("A single entity core constraint of type `" + mlirType + "`").str()}; item.sortText = "0"; item.insertText = snippetText.str(); item.insertTextFormat = snippetText.empty() - ? lsp::InsertTextFormat::PlainText - : lsp::InsertTextFormat::Snippet; + ? llvm::lsp::InsertTextFormat::PlainText + : llvm::lsp::InsertTextFormat::Snippet; completionList.items.emplace_back(item); }; @@ -812,9 +830,9 @@ class LSPCodeCompleteContext : public CodeCompleteContext { while (scope) { for (const ast::Decl *decl : scope->getDecls()) { if (const auto *cst = dyn_cast(decl)) { - lsp::CompletionItem item; + llvm::lsp::CompletionItem item; item.label = cst->getName().getName().str(); - item.kind = lsp::CompletionItemKind::Interface; + item.kind = llvm::lsp::CompletionItemKind::Interface; item.sortText = "2_" + item.label; // Skip constraints that are not single-arg. We currently only @@ -841,8 +859,8 @@ class LSPCodeCompleteContext : public CodeCompleteContext { // Format the documentation for the constraint. if (std::optional doc = getDocumentationFor(sourceMgr, cst)) { - item.documentation = - lsp::MarkupContent{lsp::MarkupKind::Markdown, std::move(*doc)}; + item.documentation = llvm::lsp::MarkupContent{ + llvm::lsp::MarkupKind::Markdown, std::move(*doc)}; } completionList.items.emplace_back(item); @@ -856,10 +874,10 @@ class LSPCodeCompleteContext : public CodeCompleteContext { void codeCompleteDialectName() final { // Code complete known dialects. for (const ods::Dialect &dialect : odsContext.getDialects()) { - lsp::CompletionItem item; + llvm::lsp::CompletionItem item; item.label = dialect.getName().str(); - item.kind = lsp::CompletionItemKind::Class; - item.insertTextFormat = lsp::InsertTextFormat::PlainText; + item.kind = llvm::lsp::CompletionItemKind::Class; + item.insertTextFormat = llvm::lsp::InsertTextFormat::PlainText; completionList.items.emplace_back(item); } } @@ -872,10 +890,10 @@ class LSPCodeCompleteContext : public CodeCompleteContext { for (const auto &it : dialect->getOperations()) { const ods::Operation &op = *it.second; - lsp::CompletionItem item; + llvm::lsp::CompletionItem item; item.label = op.getName().drop_front(dialectName.size() + 1).str(); - item.kind = lsp::CompletionItemKind::Field; - item.insertTextFormat = lsp::InsertTextFormat::PlainText; + item.kind = llvm::lsp::CompletionItemKind::Field; + item.insertTextFormat = llvm::lsp::InsertTextFormat::PlainText; completionList.items.emplace_back(item); } } @@ -883,16 +901,16 @@ class LSPCodeCompleteContext : public CodeCompleteContext { void codeCompletePatternMetadata() final { auto addSimpleConstraint = [&](StringRef constraint, StringRef desc, StringRef snippetText = "") { - lsp::CompletionItem item; + llvm::lsp::CompletionItem item; item.label = constraint.str(); - item.kind = lsp::CompletionItemKind::Class; + item.kind = llvm::lsp::CompletionItemKind::Class; item.detail = "pattern metadata"; item.documentation = - lsp::MarkupContent{lsp::MarkupKind::Markdown, desc.str()}; + llvm::lsp::MarkupContent{llvm::lsp::MarkupKind::Markdown, desc.str()}; item.insertText = snippetText.str(); item.insertTextFormat = snippetText.empty() - ? lsp::InsertTextFormat::PlainText - : lsp::InsertTextFormat::Snippet; + ? llvm::lsp::InsertTextFormat::PlainText + : llvm::lsp::InsertTextFormat::Snippet; completionList.items.emplace_back(item); }; @@ -913,10 +931,10 @@ class LSPCodeCompleteContext : public CodeCompleteContext { // Functor used to add a single include completion item. auto addIncludeCompletion = [&](StringRef path, bool isDirectory) { - lsp::CompletionItem item; + llvm::lsp::CompletionItem item; item.label = path.str(); - item.kind = isDirectory ? lsp::CompletionItemKind::Folder - : lsp::CompletionItemKind::File; + item.kind = isDirectory ? llvm::lsp::CompletionItemKind::Folder + : llvm::lsp::CompletionItemKind::File; if (seenResults.insert(item.label).second) completionList.items.emplace_back(item); }; @@ -961,31 +979,31 @@ class LSPCodeCompleteContext : public CodeCompleteContext { // Sort the completion results to make sure the output is deterministic in // the face of different iteration schemes for different platforms. - llvm::sort(completionList.items, [](const lsp::CompletionItem &lhs, - const lsp::CompletionItem &rhs) { + llvm::sort(completionList.items, [](const llvm::lsp::CompletionItem &lhs, + const llvm::lsp::CompletionItem &rhs) { return lhs.label < rhs.label; }); } private: llvm::SourceMgr &sourceMgr; - lsp::CompletionList &completionList; + llvm::lsp::CompletionList &completionList; ods::Context &odsContext; ArrayRef includeDirs; }; } // namespace -lsp::CompletionList -PDLDocument::getCodeCompletion(const lsp::URIForFile &uri, - const lsp::Position &completePos) { +llvm::lsp::CompletionList +PDLDocument::getCodeCompletion(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &completePos) { SMLoc posLoc = completePos.getAsSMLoc(sourceMgr); if (!posLoc.isValid()) - return lsp::CompletionList(); + return llvm::lsp::CompletionList(); // To perform code completion, we run another parse of the module with the // code completion context provided. ods::Context tmpODSContext; - lsp::CompletionList completionList; + llvm::lsp::CompletionList completionList; LSPCodeCompleteContext lspCompleteContext(posLoc, sourceMgr, completionList, tmpODSContext, sourceMgr.getIncludeDirs()); @@ -1005,7 +1023,7 @@ namespace { class LSPSignatureHelpContext : public CodeCompleteContext { public: LSPSignatureHelpContext(SMLoc completeLoc, llvm::SourceMgr &sourceMgr, - lsp::SignatureHelp &signatureHelp, + llvm::lsp::SignatureHelp &signatureHelp, ods::Context &odsContext) : CodeCompleteContext(completeLoc), sourceMgr(sourceMgr), signatureHelp(signatureHelp), odsContext(odsContext) {} @@ -1014,7 +1032,7 @@ class LSPSignatureHelpContext : public CodeCompleteContext { unsigned currentNumArgs) final { signatureHelp.activeParameter = currentNumArgs; - lsp::SignatureInformation signatureInfo; + llvm::lsp::SignatureInformation signatureInfo; { llvm::raw_string_ostream strOS(signatureInfo.label); strOS << callable->getName()->getName() << "("; @@ -1022,7 +1040,7 @@ class LSPSignatureHelpContext : public CodeCompleteContext { unsigned paramStart = strOS.str().size(); strOS << var->getName().getName() << ": " << var->getType(); unsigned paramEnd = strOS.str().size(); - signatureInfo.parameters.emplace_back(lsp::ParameterInformation{ + signatureInfo.parameters.emplace_back(llvm::lsp::ParameterInformation{ StringRef(strOS.str()).slice(paramStart, paramEnd).str(), std::make_pair(paramStart, paramEnd), /*paramDoc*/ std::string()}); }; @@ -1070,7 +1088,7 @@ class LSPSignatureHelpContext : public CodeCompleteContext { // not more than what is defined in ODS, as this will result in an error // anyways. if (odsOp && currentValue < values.size()) { - lsp::SignatureInformation signatureInfo; + llvm::lsp::SignatureInformation signatureInfo; // Build the signature label. { @@ -1099,7 +1117,7 @@ class LSPSignatureHelpContext : public CodeCompleteContext { } unsigned paramEnd = strOS.str().size(); - signatureInfo.parameters.emplace_back(lsp::ParameterInformation{ + signatureInfo.parameters.emplace_back(llvm::lsp::ParameterInformation{ StringRef(strOS.str()).slice(paramStart, paramEnd).str(), std::make_pair(paramStart, paramEnd), paramDoc}); }; @@ -1114,12 +1132,12 @@ class LSPSignatureHelpContext : public CodeCompleteContext { // If there aren't any arguments yet, we also add the generic signature. if (currentValue == 0 && (!odsOp || !values.empty())) { - lsp::SignatureInformation signatureInfo; + llvm::lsp::SignatureInformation signatureInfo; signatureInfo.label = llvm::formatv("(<{0}s>: {1}Range)", label, dataType).str(); signatureInfo.documentation = ("Generic operation " + label + " specification").str(); - signatureInfo.parameters.emplace_back(lsp::ParameterInformation{ + signatureInfo.parameters.emplace_back(llvm::lsp::ParameterInformation{ StringRef(signatureInfo.label).drop_front().drop_back().str(), std::pair(1, signatureInfo.label.size() - 1), ("All of the " + label + "s of the operation.").str()}); @@ -1129,21 +1147,22 @@ class LSPSignatureHelpContext : public CodeCompleteContext { private: llvm::SourceMgr &sourceMgr; - lsp::SignatureHelp &signatureHelp; + llvm::lsp::SignatureHelp &signatureHelp; ods::Context &odsContext; }; } // namespace -lsp::SignatureHelp PDLDocument::getSignatureHelp(const lsp::URIForFile &uri, - const lsp::Position &helpPos) { +llvm::lsp::SignatureHelp +PDLDocument::getSignatureHelp(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &helpPos) { SMLoc posLoc = helpPos.getAsSMLoc(sourceMgr); if (!posLoc.isValid()) - return lsp::SignatureHelp(); + return llvm::lsp::SignatureHelp(); // To perform code completion, we run another parse of the module with the // code completion context provided. ods::Context tmpODSContext; - lsp::SignatureHelp signatureHelp; + llvm::lsp::SignatureHelp signatureHelp; LSPSignatureHelpContext completeContext(posLoc, sourceMgr, signatureHelp, tmpODSContext); @@ -1173,9 +1192,9 @@ static bool shouldAddHintFor(const ast::Expr *expr, StringRef name) { return true; } -void PDLDocument::getInlayHints(const lsp::URIForFile &uri, - const lsp::Range &range, - std::vector &inlayHints) { +void PDLDocument::getInlayHints(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Range &range, + std::vector &inlayHints) { if (failed(astModule)) return; SMRange rangeLoc = range.getAsSMRange(sourceMgr); @@ -1198,9 +1217,9 @@ void PDLDocument::getInlayHints(const lsp::URIForFile &uri, }); } -void PDLDocument::getInlayHintsFor(const ast::VariableDecl *decl, - const lsp::URIForFile &uri, - std::vector &inlayHints) { +void PDLDocument::getInlayHintsFor( + const ast::VariableDecl *decl, const llvm::lsp::URIForFile &uri, + std::vector &inlayHints) { // Check to see if the variable has a constraint list, if it does we don't // provide initializer hints. if (!decl->getConstraints().empty()) @@ -1215,8 +1234,8 @@ void PDLDocument::getInlayHintsFor(const ast::VariableDecl *decl, return; } - lsp::InlayHint hint(lsp::InlayHintKind::Type, - lsp::Position(sourceMgr, decl->getLoc().End)); + llvm::lsp::InlayHint hint(llvm::lsp::InlayHintKind::Type, + llvm::lsp::Position(sourceMgr, decl->getLoc().End)); { llvm::raw_string_ostream labelOS(hint.label); labelOS << ": " << decl->getType(); @@ -1225,9 +1244,9 @@ void PDLDocument::getInlayHintsFor(const ast::VariableDecl *decl, inlayHints.emplace_back(std::move(hint)); } -void PDLDocument::getInlayHintsFor(const ast::CallExpr *expr, - const lsp::URIForFile &uri, - std::vector &inlayHints) { +void PDLDocument::getInlayHintsFor( + const ast::CallExpr *expr, const llvm::lsp::URIForFile &uri, + std::vector &inlayHints) { // Try to extract the callable of this call. const auto *callableRef = dyn_cast(expr->getCallableExpr()); const auto *callable = @@ -1242,9 +1261,9 @@ void PDLDocument::getInlayHintsFor(const ast::CallExpr *expr, std::get<1>(it)->getName().getName()); } -void PDLDocument::getInlayHintsFor(const ast::OperationExpr *expr, - const lsp::URIForFile &uri, - std::vector &inlayHints) { +void PDLDocument::getInlayHintsFor( + const ast::OperationExpr *expr, const llvm::lsp::URIForFile &uri, + std::vector &inlayHints) { // Check for ODS information. ast::OperationType opType = dyn_cast(expr->getType()); const auto *odsOp = opType ? opType.getODSOperation() : nullptr; @@ -1290,13 +1309,15 @@ void PDLDocument::getInlayHintsFor(const ast::OperationExpr *expr, "results"); } -void PDLDocument::addParameterHintFor(std::vector &inlayHints, - const ast::Expr *expr, StringRef label) { +void PDLDocument::addParameterHintFor( + std::vector &inlayHints, const ast::Expr *expr, + StringRef label) { if (!shouldAddHintFor(expr, label)) return; - lsp::InlayHint hint(lsp::InlayHintKind::Parameter, - lsp::Position(sourceMgr, expr->getLoc().Start)); + llvm::lsp::InlayHint hint( + llvm::lsp::InlayHintKind::Parameter, + llvm::lsp::Position(sourceMgr, expr->getLoc().Start)); hint.label = (label + ":").str(); hint.paddingRight = true; inlayHints.emplace_back(std::move(hint)); @@ -1342,22 +1363,24 @@ void PDLDocument::getPDLLViewOutput(raw_ostream &os, namespace { /// This class represents a single chunk of an PDL text file. struct PDLTextFileChunk { - PDLTextFileChunk(uint64_t lineOffset, const lsp::URIForFile &uri, + PDLTextFileChunk(uint64_t lineOffset, const llvm::lsp::URIForFile &uri, StringRef contents, const std::vector &extraDirs, - std::vector &diagnostics) + std::vector &diagnostics) : lineOffset(lineOffset), document(uri, contents, extraDirs, diagnostics) {} /// Adjust the line number of the given range to anchor at the beginning of /// the file, instead of the beginning of this chunk. - void adjustLocForChunkOffset(lsp::Range &range) { + void adjustLocForChunkOffset(llvm::lsp::Range &range) { adjustLocForChunkOffset(range.start); adjustLocForChunkOffset(range.end); } /// Adjust the line number of the given position to anchor at the beginning of /// the file, instead of the beginning of this chunk. - void adjustLocForChunkOffset(lsp::Position &pos) { pos.line += lineOffset; } + void adjustLocForChunkOffset(llvm::lsp::Position &pos) { + pos.line += lineOffset; + } /// The line offset of this chunk from the beginning of the file. uint64_t lineOffset; @@ -1374,38 +1397,41 @@ namespace { /// This class represents a text file containing one or more PDL documents. class PDLTextFile { public: - PDLTextFile(const lsp::URIForFile &uri, StringRef fileContents, + PDLTextFile(const llvm::lsp::URIForFile &uri, StringRef fileContents, int64_t version, const std::vector &extraDirs, - std::vector &diagnostics); + std::vector &diagnostics); /// Return the current version of this text file. int64_t getVersion() const { return version; } /// Update the file to the new version using the provided set of content /// changes. Returns failure if the update was unsuccessful. - LogicalResult update(const lsp::URIForFile &uri, int64_t newVersion, - ArrayRef changes, - std::vector &diagnostics); + LogicalResult + update(const llvm::lsp::URIForFile &uri, int64_t newVersion, + ArrayRef changes, + std::vector &diagnostics); //===--------------------------------------------------------------------===// // LSP Queries //===--------------------------------------------------------------------===// - void getLocationsOf(const lsp::URIForFile &uri, lsp::Position defPos, - std::vector &locations); - void findReferencesOf(const lsp::URIForFile &uri, lsp::Position pos, - std::vector &references); - void getDocumentLinks(const lsp::URIForFile &uri, - std::vector &links); - std::optional findHover(const lsp::URIForFile &uri, - lsp::Position hoverPos); - void findDocumentSymbols(std::vector &symbols); - lsp::CompletionList getCodeCompletion(const lsp::URIForFile &uri, - lsp::Position completePos); - lsp::SignatureHelp getSignatureHelp(const lsp::URIForFile &uri, - lsp::Position helpPos); - void getInlayHints(const lsp::URIForFile &uri, lsp::Range range, - std::vector &inlayHints); + void getLocationsOf(const llvm::lsp::URIForFile &uri, + llvm::lsp::Position defPos, + std::vector &locations); + void findReferencesOf(const llvm::lsp::URIForFile &uri, + llvm::lsp::Position pos, + std::vector &references); + void getDocumentLinks(const llvm::lsp::URIForFile &uri, + std::vector &links); + std::optional findHover(const llvm::lsp::URIForFile &uri, + llvm::lsp::Position hoverPos); + void findDocumentSymbols(std::vector &symbols); + llvm::lsp::CompletionList getCodeCompletion(const llvm::lsp::URIForFile &uri, + llvm::lsp::Position completePos); + llvm::lsp::SignatureHelp getSignatureHelp(const llvm::lsp::URIForFile &uri, + llvm::lsp::Position helpPos); + void getInlayHints(const llvm::lsp::URIForFile &uri, llvm::lsp::Range range, + std::vector &inlayHints); lsp::PDLLViewOutputResult getPDLLViewOutput(lsp::PDLLViewOutputKind kind); private: @@ -1413,14 +1439,14 @@ class PDLTextFile { std::vector>::iterator>; /// Initialize the text file from the given file contents. - void initialize(const lsp::URIForFile &uri, int64_t newVersion, - std::vector &diagnostics); + void initialize(const llvm::lsp::URIForFile &uri, int64_t newVersion, + std::vector &diagnostics); /// Find the PDL document that contains the given position, and update the /// position to be anchored at the start of the found chunk instead of the /// beginning of the file. - ChunkIterator getChunkItFor(lsp::Position &pos); - PDLTextFileChunk &getChunkFor(lsp::Position &pos) { + ChunkIterator getChunkItFor(llvm::lsp::Position &pos); + PDLTextFileChunk &getChunkFor(llvm::lsp::Position &pos) { return *getChunkItFor(pos); } @@ -1442,20 +1468,21 @@ class PDLTextFile { }; } // namespace -PDLTextFile::PDLTextFile(const lsp::URIForFile &uri, StringRef fileContents, - int64_t version, +PDLTextFile::PDLTextFile(const llvm::lsp::URIForFile &uri, + StringRef fileContents, int64_t version, const std::vector &extraDirs, - std::vector &diagnostics) + std::vector &diagnostics) : contents(fileContents.str()), extraIncludeDirs(extraDirs) { initialize(uri, version, diagnostics); } LogicalResult -PDLTextFile::update(const lsp::URIForFile &uri, int64_t newVersion, - ArrayRef changes, - std::vector &diagnostics) { - if (failed(lsp::TextDocumentContentChangeEvent::applyTo(changes, contents))) { - lsp::Logger::error("Failed to update contents of {0}", uri.file()); +PDLTextFile::update(const llvm::lsp::URIForFile &uri, int64_t newVersion, + ArrayRef changes, + std::vector &diagnostics) { + if (failed(llvm::lsp::TextDocumentContentChangeEvent::applyTo(changes, + contents))) { + llvm::lsp::Logger::error("Failed to update contents of {0}", uri.file()); return failure(); } @@ -1464,36 +1491,37 @@ PDLTextFile::update(const lsp::URIForFile &uri, int64_t newVersion, return success(); } -void PDLTextFile::getLocationsOf(const lsp::URIForFile &uri, - lsp::Position defPos, - std::vector &locations) { +void PDLTextFile::getLocationsOf(const llvm::lsp::URIForFile &uri, + llvm::lsp::Position defPos, + std::vector &locations) { PDLTextFileChunk &chunk = getChunkFor(defPos); chunk.document.getLocationsOf(uri, defPos, locations); // Adjust any locations within this file for the offset of this chunk. if (chunk.lineOffset == 0) return; - for (lsp::Location &loc : locations) + for (llvm::lsp::Location &loc : locations) if (loc.uri == uri) chunk.adjustLocForChunkOffset(loc.range); } -void PDLTextFile::findReferencesOf(const lsp::URIForFile &uri, - lsp::Position pos, - std::vector &references) { +void PDLTextFile::findReferencesOf( + const llvm::lsp::URIForFile &uri, llvm::lsp::Position pos, + std::vector &references) { PDLTextFileChunk &chunk = getChunkFor(pos); chunk.document.findReferencesOf(uri, pos, references); // Adjust any locations within this file for the offset of this chunk. if (chunk.lineOffset == 0) return; - for (lsp::Location &loc : references) + for (llvm::lsp::Location &loc : references) if (loc.uri == uri) chunk.adjustLocForChunkOffset(loc.range); } -void PDLTextFile::getDocumentLinks(const lsp::URIForFile &uri, - std::vector &links) { +void PDLTextFile::getDocumentLinks( + const llvm::lsp::URIForFile &uri, + std::vector &links) { chunks.front()->document.getDocumentLinks(uri, links); for (const auto &it : llvm::drop_begin(chunks)) { size_t currentNumLinks = links.size(); @@ -1506,10 +1534,12 @@ void PDLTextFile::getDocumentLinks(const lsp::URIForFile &uri, } } -std::optional PDLTextFile::findHover(const lsp::URIForFile &uri, - lsp::Position hoverPos) { +std::optional +PDLTextFile::findHover(const llvm::lsp::URIForFile &uri, + llvm::lsp::Position hoverPos) { PDLTextFileChunk &chunk = getChunkFor(hoverPos); - std::optional hoverInfo = chunk.document.findHover(uri, hoverPos); + std::optional hoverInfo = + chunk.document.findHover(uri, hoverPos); // Adjust any locations within this file for the offset of this chunk. if (chunk.lineOffset != 0 && hoverInfo && hoverInfo->range) @@ -1518,7 +1548,7 @@ std::optional PDLTextFile::findHover(const lsp::URIForFile &uri, } void PDLTextFile::findDocumentSymbols( - std::vector &symbols) { + std::vector &symbols) { if (chunks.size() == 1) return chunks.front()->document.findDocumentSymbols(symbols); @@ -1526,27 +1556,27 @@ void PDLTextFile::findDocumentSymbols( // each chunk. for (unsigned i = 0, e = chunks.size(); i < e; ++i) { PDLTextFileChunk &chunk = *chunks[i]; - lsp::Position startPos(chunk.lineOffset); - lsp::Position endPos((i == e - 1) ? totalNumLines - 1 - : chunks[i + 1]->lineOffset); - lsp::DocumentSymbol symbol("", - lsp::SymbolKind::Namespace, - /*range=*/lsp::Range(startPos, endPos), - /*selectionRange=*/lsp::Range(startPos)); + llvm::lsp::Position startPos(chunk.lineOffset); + llvm::lsp::Position endPos((i == e - 1) ? totalNumLines - 1 + : chunks[i + 1]->lineOffset); + llvm::lsp::DocumentSymbol symbol( + "", llvm::lsp::SymbolKind::Namespace, + /*range=*/llvm::lsp::Range(startPos, endPos), + /*selectionRange=*/llvm::lsp::Range(startPos)); chunk.document.findDocumentSymbols(symbol.children); // Fixup the locations of document symbols within this chunk. if (i != 0) { - SmallVector symbolsToFix; - for (lsp::DocumentSymbol &childSymbol : symbol.children) + SmallVector symbolsToFix; + for (llvm::lsp::DocumentSymbol &childSymbol : symbol.children) symbolsToFix.push_back(&childSymbol); while (!symbolsToFix.empty()) { - lsp::DocumentSymbol *symbol = symbolsToFix.pop_back_val(); + llvm::lsp::DocumentSymbol *symbol = symbolsToFix.pop_back_val(); chunk.adjustLocForChunkOffset(symbol->range); chunk.adjustLocForChunkOffset(symbol->selectionRange); - for (lsp::DocumentSymbol &childSymbol : symbol->children) + for (llvm::lsp::DocumentSymbol &childSymbol : symbol->children) symbolsToFix.push_back(&childSymbol); } } @@ -1556,34 +1586,37 @@ void PDLTextFile::findDocumentSymbols( } } -lsp::CompletionList PDLTextFile::getCodeCompletion(const lsp::URIForFile &uri, - lsp::Position completePos) { +llvm::lsp::CompletionList +PDLTextFile::getCodeCompletion(const llvm::lsp::URIForFile &uri, + llvm::lsp::Position completePos) { PDLTextFileChunk &chunk = getChunkFor(completePos); - lsp::CompletionList completionList = + llvm::lsp::CompletionList completionList = chunk.document.getCodeCompletion(uri, completePos); // Adjust any completion locations. - for (lsp::CompletionItem &item : completionList.items) { + for (llvm::lsp::CompletionItem &item : completionList.items) { if (item.textEdit) chunk.adjustLocForChunkOffset(item.textEdit->range); - for (lsp::TextEdit &edit : item.additionalTextEdits) + for (llvm::lsp::TextEdit &edit : item.additionalTextEdits) chunk.adjustLocForChunkOffset(edit.range); } return completionList; } -lsp::SignatureHelp PDLTextFile::getSignatureHelp(const lsp::URIForFile &uri, - lsp::Position helpPos) { +llvm::lsp::SignatureHelp +PDLTextFile::getSignatureHelp(const llvm::lsp::URIForFile &uri, + llvm::lsp::Position helpPos) { return getChunkFor(helpPos).document.getSignatureHelp(uri, helpPos); } -void PDLTextFile::getInlayHints(const lsp::URIForFile &uri, lsp::Range range, - std::vector &inlayHints) { +void PDLTextFile::getInlayHints(const llvm::lsp::URIForFile &uri, + llvm::lsp::Range range, + std::vector &inlayHints) { auto startIt = getChunkItFor(range.start); auto endIt = getChunkItFor(range.end); // Functor used to get the chunks for a given file, and fixup any locations - auto getHintsForChunk = [&](ChunkIterator chunkIt, lsp::Range range) { + auto getHintsForChunk = [&](ChunkIterator chunkIt, llvm::lsp::Range range) { size_t currentNumHints = inlayHints.size(); chunkIt->document.getInlayHints(uri, range, inlayHints); @@ -1605,15 +1638,16 @@ void PDLTextFile::getInlayHints(const lsp::URIForFile &uri, lsp::Range range, // Otherwise, the range is split between multiple chunks. The first chunk // has the correct range start, but covers the total document. - getHintsForChunk(startIt, lsp::Range(range.start, getNumLines(startIt))); + getHintsForChunk(startIt, + llvm::lsp::Range(range.start, getNumLines(startIt))); // Every chunk in between uses the full document. for (++startIt; startIt != endIt; ++startIt) - getHintsForChunk(startIt, lsp::Range(0, getNumLines(startIt))); + getHintsForChunk(startIt, llvm::lsp::Range(0, getNumLines(startIt))); // The range for the last chunk starts at the beginning of the document, up // through the end of the input range. - getHintsForChunk(startIt, lsp::Range(0, range.end)); + getHintsForChunk(startIt, llvm::lsp::Range(0, range.end)); } lsp::PDLLViewOutputResult @@ -1632,8 +1666,9 @@ PDLTextFile::getPDLLViewOutput(lsp::PDLLViewOutputKind kind) { return result; } -void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion, - std::vector &diagnostics) { +void PDLTextFile::initialize(const llvm::lsp::URIForFile &uri, + int64_t newVersion, + std::vector &diagnostics) { version = newVersion; chunks.clear(); @@ -1653,7 +1688,7 @@ void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion, // Adjust locations used in diagnostics to account for the offset from the // beginning of the file. - for (lsp::Diagnostic &diag : + for (llvm::lsp::Diagnostic &diag : llvm::drop_begin(diagnostics, currentNumDiags)) { chunk->adjustLocForChunkOffset(diag.range); @@ -1668,14 +1703,15 @@ void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion, totalNumLines = lineOffset; } -PDLTextFile::ChunkIterator PDLTextFile::getChunkItFor(lsp::Position &pos) { +PDLTextFile::ChunkIterator +PDLTextFile::getChunkItFor(llvm::lsp::Position &pos) { if (chunks.size() == 1) return chunks.begin(); // Search for the first chunk with a greater line offset, the previous chunk // is the one that contains `pos`. auto it = llvm::upper_bound( - chunks, pos, [](const lsp::Position &pos, const auto &chunk) { + chunks, pos, [](const llvm::lsp::Position &pos, const auto &chunk) { return static_cast(pos.line) < chunk->lineOffset; }); ChunkIterator chunkIt(it == chunks.end() ? (chunks.end() - 1) : --it); @@ -1710,9 +1746,9 @@ lsp::PDLLServer::PDLLServer(const Options &options) : impl(std::make_unique(options)) {} lsp::PDLLServer::~PDLLServer() = default; -void lsp::PDLLServer::addDocument(const URIForFile &uri, StringRef contents, - int64_t version, - std::vector &diagnostics) { +void lsp::PDLLServer::addDocument( + const URIForFile &uri, StringRef contents, int64_t version, + std::vector &diagnostics) { // Build the set of additional include directories. std::vector additionalIncludeDirs = impl->options.extraDirs; const auto &fileInfo = impl->compilationDatabase.getFileInfo(uri.file()); @@ -1724,7 +1760,7 @@ void lsp::PDLLServer::addDocument(const URIForFile &uri, StringRef contents, void lsp::PDLLServer::updateDocument( const URIForFile &uri, ArrayRef changes, - int64_t version, std::vector &diagnostics) { + int64_t version, std::vector &diagnostics) { // Check that we actually have a document for this uri. auto it = impl->files.find(uri.file()); if (it == impl->files.end()) @@ -1746,17 +1782,17 @@ std::optional lsp::PDLLServer::removeDocument(const URIForFile &uri) { return version; } -void lsp::PDLLServer::getLocationsOf(const URIForFile &uri, - const Position &defPos, - std::vector &locations) { +void lsp::PDLLServer::getLocationsOf( + const URIForFile &uri, const Position &defPos, + std::vector &locations) { auto fileIt = impl->files.find(uri.file()); if (fileIt != impl->files.end()) fileIt->second->getLocationsOf(uri, defPos, locations); } -void lsp::PDLLServer::findReferencesOf(const URIForFile &uri, - const Position &pos, - std::vector &references) { +void lsp::PDLLServer::findReferencesOf( + const URIForFile &uri, const Position &pos, + std::vector &references) { auto fileIt = impl->files.find(uri.file()); if (fileIt != impl->files.end()) fileIt->second->findReferencesOf(uri, pos, references); @@ -1769,8 +1805,8 @@ void lsp::PDLLServer::getDocumentLinks( return fileIt->second->getDocumentLinks(uri, documentLinks); } -std::optional lsp::PDLLServer::findHover(const URIForFile &uri, - const Position &hoverPos) { +std::optional +lsp::PDLLServer::findHover(const URIForFile &uri, const Position &hoverPos) { auto fileIt = impl->files.find(uri.file()); if (fileIt != impl->files.end()) return fileIt->second->findHover(uri, hoverPos); @@ -1793,8 +1829,9 @@ lsp::PDLLServer::getCodeCompletion(const URIForFile &uri, return CompletionList(); } -lsp::SignatureHelp lsp::PDLLServer::getSignatureHelp(const URIForFile &uri, - const Position &helpPos) { +llvm::lsp::SignatureHelp +lsp::PDLLServer::getSignatureHelp(const URIForFile &uri, + const Position &helpPos) { auto fileIt = impl->files.find(uri.file()); if (fileIt != impl->files.end()) return fileIt->second->getSignatureHelp(uri, helpPos); diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.h b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.h index 134431fa63bf8..d82014d6b0684 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.h +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.h @@ -11,6 +11,7 @@ #include "mlir/Support/LLVM.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/LSP/Protocol.h" #include #include #include @@ -18,21 +19,22 @@ namespace mlir { namespace lsp { -struct Diagnostic; +using llvm::lsp::CompletionList; +using llvm::lsp::Diagnostic; +using llvm::lsp::DocumentLink; +using llvm::lsp::DocumentSymbol; +using llvm::lsp::Hover; +using llvm::lsp::InlayHint; +using llvm::lsp::Location; +using llvm::lsp::Position; +using llvm::lsp::Range; +using llvm::lsp::SignatureHelp; +using llvm::lsp::TextDocumentContentChangeEvent; +using llvm::lsp::URIForFile; + class CompilationDatabase; struct PDLLViewOutputResult; enum class PDLLViewOutputKind; -struct CompletionList; -struct DocumentLink; -struct DocumentSymbol; -struct Hover; -struct InlayHint; -struct Location; -struct Position; -struct Range; -struct SignatureHelp; -struct TextDocumentContentChangeEvent; -class URIForFile; /// This class implements all of the PDLL related functionality necessary for a /// language server. This class allows for keeping the PDLL specific logic diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.cpp index 0c9896e3ec1b4..ace460536aa1b 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.cpp +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "Protocol.h" +#include "mlir/Support/LLVM.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/JSON.h" diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.h b/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.h index 0706316631851..a2775f8cbadc2 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.h +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.h @@ -20,10 +20,12 @@ #ifndef LIB_MLIR_TOOLS_MLIRPDLLLSPSERVER_PROTOCOL_H_ #define LIB_MLIR_TOOLS_MLIRPDLLLSPSERVER_PROTOCOL_H_ -#include "mlir/Tools/lsp-server-support/Protocol.h" +#include "llvm/Support/LSP/Protocol.h" namespace mlir { namespace lsp { +using llvm::lsp::URIForFile; + //===----------------------------------------------------------------------===// // PDLLViewOutputParams //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Tools/tblgen-lsp-server/CMakeLists.txt b/mlir/lib/Tools/tblgen-lsp-server/CMakeLists.txt index 80fc1ffe4029a..b21650ed03b6f 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/CMakeLists.txt +++ b/mlir/lib/Tools/tblgen-lsp-server/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS Demangle Support TableGen + SupportLSP ) llvm_add_library(TableGenLspServerLib diff --git a/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp b/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp index bb3c0a77747aa..95a457f3144c5 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp +++ b/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp @@ -9,14 +9,33 @@ #include "LSPServer.h" #include "TableGenServer.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Protocol.h" -#include "mlir/Tools/lsp-server-support/Transport.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Protocol.h" +#include "llvm/Support/LSP/Transport.h" #include using namespace mlir; using namespace mlir::lsp; +using llvm::lsp::Callback; +using llvm::lsp::DidChangeTextDocumentParams; +using llvm::lsp::DidCloseTextDocumentParams; +using llvm::lsp::DidOpenTextDocumentParams; +using llvm::lsp::DocumentLinkParams; +using llvm::lsp::Hover; +using llvm::lsp::InitializedParams; +using llvm::lsp::InitializeParams; +using llvm::lsp::JSONTransport; +using llvm::lsp::Location; +using llvm::lsp::Logger; +using llvm::lsp::MessageHandler; +using llvm::lsp::NoParams; +using llvm::lsp::OutgoingNotification; +using llvm::lsp::PublishDiagnosticsParams; +using llvm::lsp::ReferenceParams; +using llvm::lsp::TextDocumentPositionParams; +using llvm::lsp::TextDocumentSyncKind; + //===----------------------------------------------------------------------===// // LSPServer //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Tools/tblgen-lsp-server/LSPServer.h b/mlir/lib/Tools/tblgen-lsp-server/LSPServer.h index 501a9dada8aab..596688b62f8da 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/LSPServer.h +++ b/mlir/lib/Tools/tblgen-lsp-server/LSPServer.h @@ -13,17 +13,19 @@ namespace llvm { struct LogicalResult; +namespace lsp { +class JSONTransport; +} // namespace lsp } // namespace llvm namespace mlir { namespace lsp { -class JSONTransport; class TableGenServer; /// Run the main loop of the LSP server using the given TableGen server and /// transport. llvm::LogicalResult runTableGenLSPServer(TableGenServer &server, - JSONTransport &transport); + llvm::lsp::JSONTransport &transport); } // namespace lsp } // namespace mlir diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenLspServerMain.cpp b/mlir/lib/Tools/tblgen-lsp-server/TableGenLspServerMain.cpp index 21af78c9a506c..8014b8d6dba4a 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenLspServerMain.cpp +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenLspServerMain.cpp @@ -9,14 +9,18 @@ #include "mlir/Tools/tblgen-lsp-server/TableGenLspServerMain.h" #include "LSPServer.h" #include "TableGenServer.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Transport.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Transport.h" #include "llvm/Support/Program.h" using namespace mlir; using namespace mlir::lsp; +using llvm::lsp::JSONStreamStyle; +using llvm::lsp::JSONTransport; +using llvm::lsp::Logger; + LogicalResult mlir::TableGenLspServerMain(int argc, char **argv) { llvm::cl::opt inputStyle{ "input-style", diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp index 5faeeae839f44..3080b78f187b1 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp @@ -10,12 +10,12 @@ #include "mlir/Support/IndentedOstream.h" #include "mlir/Tools/lsp-server-support/CompilationDatabase.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Protocol.h" #include "mlir/Tools/lsp-server-support/SourceMgrUtils.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/LSP/Logging.h" +#include "llvm/Support/LSP/Protocol.h" #include "llvm/Support/Path.h" #include "llvm/TableGen/Parser.h" #include "llvm/TableGen/Record.h" @@ -36,45 +36,49 @@ static SMRange convertTokenLocToRange(SMLoc loc) { /// Returns a language server uri for the given source location. `mainFileURI` /// corresponds to the uri for the main file of the source manager. -static lsp::URIForFile getURIFromLoc(const SourceMgr &mgr, SMLoc loc, - const lsp::URIForFile &mainFileURI) { +static llvm::lsp::URIForFile +getURIFromLoc(const SourceMgr &mgr, SMLoc loc, + const llvm::lsp::URIForFile &mainFileURI) { int bufferId = mgr.FindBufferContainingLoc(loc); if (bufferId == 0 || bufferId == static_cast(mgr.getMainFileID())) return mainFileURI; - llvm::Expected fileForLoc = lsp::URIForFile::fromFile( - mgr.getBufferInfo(bufferId).Buffer->getBufferIdentifier()); + llvm::Expected fileForLoc = + llvm::lsp::URIForFile::fromFile( + mgr.getBufferInfo(bufferId).Buffer->getBufferIdentifier()); if (fileForLoc) return *fileForLoc; - lsp::Logger::error("Failed to create URI for include file: {0}", - llvm::toString(fileForLoc.takeError())); + llvm::lsp::Logger::error("Failed to create URI for include file: {0}", + llvm::toString(fileForLoc.takeError())); return mainFileURI; } /// Returns a language server location from the given source range. -static lsp::Location getLocationFromLoc(SourceMgr &mgr, SMRange loc, - const lsp::URIForFile &uri) { - return lsp::Location(getURIFromLoc(mgr, loc.Start, uri), - lsp::Range(mgr, loc)); +static llvm::lsp::Location +getLocationFromLoc(SourceMgr &mgr, SMRange loc, + const llvm::lsp::URIForFile &uri) { + return llvm::lsp::Location(getURIFromLoc(mgr, loc.Start, uri), + llvm::lsp::Range(mgr, loc)); } -static lsp::Location getLocationFromLoc(SourceMgr &mgr, SMLoc loc, - const lsp::URIForFile &uri) { +static llvm::lsp::Location +getLocationFromLoc(SourceMgr &mgr, SMLoc loc, + const llvm::lsp::URIForFile &uri) { return getLocationFromLoc(mgr, convertTokenLocToRange(loc), uri); } /// Convert the given TableGen diagnostic to the LSP form. -static std::optional +static std::optional getLspDiagnoticFromDiag(const llvm::SMDiagnostic &diag, - const lsp::URIForFile &uri) { + const llvm::lsp::URIForFile &uri) { auto *sourceMgr = const_cast(diag.getSourceMgr()); if (!sourceMgr || !diag.getLoc().isValid()) return std::nullopt; - lsp::Diagnostic lspDiag; + llvm::lsp::Diagnostic lspDiag; lspDiag.source = "tablegen"; lspDiag.category = "Parse Error"; // Try to grab a file location for this diagnostic. - lsp::Location loc = getLocationFromLoc(*sourceMgr, diag.getLoc(), uri); + llvm::lsp::Location loc = getLocationFromLoc(*sourceMgr, diag.getLoc(), uri); lspDiag.range = loc.range; // Skip diagnostics that weren't emitted within the main file. @@ -84,17 +88,17 @@ getLspDiagnoticFromDiag(const llvm::SMDiagnostic &diag, // Convert the severity for the diagnostic. switch (diag.getKind()) { case SourceMgr::DK_Warning: - lspDiag.severity = lsp::DiagnosticSeverity::Warning; + lspDiag.severity = llvm::lsp::DiagnosticSeverity::Warning; break; case SourceMgr::DK_Error: - lspDiag.severity = lsp::DiagnosticSeverity::Error; + lspDiag.severity = llvm::lsp::DiagnosticSeverity::Error; break; case SourceMgr::DK_Note: // Notes are emitted separately from the main diagnostic, so we just treat // them as remarks given that we can't determine the diagnostic to relate // them to. case SourceMgr::DK_Remark: - lspDiag.severity = lsp::DiagnosticSeverity::Information; + lspDiag.severity = llvm::lsp::DiagnosticSeverity::Information; break; } lspDiag.message = diag.getMessage().str(); @@ -322,54 +326,59 @@ namespace { /// This class represents a text file containing one or more TableGen documents. class TableGenTextFile { public: - TableGenTextFile(const lsp::URIForFile &uri, StringRef fileContents, + TableGenTextFile(const llvm::lsp::URIForFile &uri, StringRef fileContents, int64_t version, const std::vector &extraIncludeDirs, - std::vector &diagnostics); + std::vector &diagnostics); /// Return the current version of this text file. int64_t getVersion() const { return version; } /// Update the file to the new version using the provided set of content /// changes. Returns failure if the update was unsuccessful. - LogicalResult update(const lsp::URIForFile &uri, int64_t newVersion, - ArrayRef changes, - std::vector &diagnostics); + LogicalResult + update(const llvm::lsp::URIForFile &uri, int64_t newVersion, + ArrayRef changes, + std::vector &diagnostics); //===--------------------------------------------------------------------===// // Definitions and References //===--------------------------------------------------------------------===// - void getLocationsOf(const lsp::URIForFile &uri, const lsp::Position &defPos, - std::vector &locations); - void findReferencesOf(const lsp::URIForFile &uri, const lsp::Position &pos, - std::vector &references); + void getLocationsOf(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &defPos, + std::vector &locations); + void findReferencesOf(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &pos, + std::vector &references); //===--------------------------------------------------------------------===// // Document Links //===--------------------------------------------------------------------===// - void getDocumentLinks(const lsp::URIForFile &uri, - std::vector &links); + void getDocumentLinks(const llvm::lsp::URIForFile &uri, + std::vector &links); //===--------------------------------------------------------------------===// // Hover //===--------------------------------------------------------------------===// - std::optional findHover(const lsp::URIForFile &uri, - const lsp::Position &hoverPos); - lsp::Hover buildHoverForRecord(const Record *record, - const SMRange &hoverRange); - lsp::Hover buildHoverForTemplateArg(const Record *record, + std::optional + findHover(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &hoverPos); + llvm::lsp::Hover buildHoverForRecord(const Record *record, + const SMRange &hoverRange); + llvm::lsp::Hover buildHoverForTemplateArg(const Record *record, + const RecordVal *value, + const SMRange &hoverRange); + llvm::lsp::Hover buildHoverForField(const Record *record, const RecordVal *value, const SMRange &hoverRange); - lsp::Hover buildHoverForField(const Record *record, const RecordVal *value, - const SMRange &hoverRange); private: /// Initialize the text file from the given file contents. - void initialize(const lsp::URIForFile &uri, int64_t newVersion, - std::vector &diagnostics); + void initialize(const llvm::lsp::URIForFile &uri, int64_t newVersion, + std::vector &diagnostics); /// The full string contents of the file. std::string contents; @@ -395,9 +404,9 @@ class TableGenTextFile { } // namespace TableGenTextFile::TableGenTextFile( - const lsp::URIForFile &uri, StringRef fileContents, int64_t version, + const llvm::lsp::URIForFile &uri, StringRef fileContents, int64_t version, const std::vector &extraIncludeDirs, - std::vector &diagnostics) + std::vector &diagnostics) : contents(fileContents.str()), version(version) { // Build the set of include directories for this file. llvm::SmallString<32> uriDirectory(uri.file()); @@ -409,12 +418,13 @@ TableGenTextFile::TableGenTextFile( initialize(uri, version, diagnostics); } -LogicalResult -TableGenTextFile::update(const lsp::URIForFile &uri, int64_t newVersion, - ArrayRef changes, - std::vector &diagnostics) { - if (failed(lsp::TextDocumentContentChangeEvent::applyTo(changes, contents))) { - lsp::Logger::error("Failed to update contents of {0}", uri.file()); +LogicalResult TableGenTextFile::update( + const llvm::lsp::URIForFile &uri, int64_t newVersion, + ArrayRef changes, + std::vector &diagnostics) { + if (failed(llvm::lsp::TextDocumentContentChangeEvent::applyTo(changes, + contents))) { + llvm::lsp::Logger::error("Failed to update contents of {0}", uri.file()); return failure(); } @@ -423,9 +433,9 @@ TableGenTextFile::update(const lsp::URIForFile &uri, int64_t newVersion, return success(); } -void TableGenTextFile::initialize(const lsp::URIForFile &uri, - int64_t newVersion, - std::vector &diagnostics) { +void TableGenTextFile::initialize( + const llvm::lsp::URIForFile &uri, int64_t newVersion, + std::vector &diagnostics) { version = newVersion; sourceMgr = SourceMgr(); recordKeeper = std::make_unique(); @@ -433,7 +443,8 @@ void TableGenTextFile::initialize(const lsp::URIForFile &uri, // Build a buffer for this file. auto memBuffer = llvm::MemoryBuffer::getMemBuffer(contents, uri.file()); if (!memBuffer) { - lsp::Logger::error("Failed to create memory buffer for file", uri.file()); + llvm::lsp::Logger::error("Failed to create memory buffer for file", + uri.file()); return; } sourceMgr.setIncludeDirs(includeDirs); @@ -442,8 +453,8 @@ void TableGenTextFile::initialize(const lsp::URIForFile &uri, // This class provides a context argument for the SourceMgr diagnostic // handler. struct DiagHandlerContext { - std::vector &diagnostics; - const lsp::URIForFile &uri; + std::vector &diagnostics; + const llvm::lsp::URIForFile &uri; } handlerContext{diagnostics, uri}; // Set the diagnostic handler for the tablegen source manager. @@ -469,9 +480,9 @@ void TableGenTextFile::initialize(const lsp::URIForFile &uri, // TableGenTextFile: Definitions and References //===----------------------------------------------------------------------===// -void TableGenTextFile::getLocationsOf(const lsp::URIForFile &uri, - const lsp::Position &defPos, - std::vector &locations) { +void TableGenTextFile::getLocationsOf( + const llvm::lsp::URIForFile &uri, const llvm::lsp::Position &defPos, + std::vector &locations) { SMLoc posLoc = defPos.getAsSMLoc(sourceMgr); const TableGenIndexSymbol *symbol = index.lookup(posLoc); if (!symbol) @@ -492,8 +503,8 @@ void TableGenTextFile::getLocationsOf(const lsp::URIForFile &uri, } void TableGenTextFile::findReferencesOf( - const lsp::URIForFile &uri, const lsp::Position &pos, - std::vector &references) { + const llvm::lsp::URIForFile &uri, const llvm::lsp::Position &pos, + std::vector &references) { SMLoc posLoc = pos.getAsSMLoc(sourceMgr); const TableGenIndexSymbol *symbol = index.lookup(posLoc); if (!symbol) @@ -508,8 +519,9 @@ void TableGenTextFile::findReferencesOf( // TableGenTextFile: Document Links //===--------------------------------------------------------------------===// -void TableGenTextFile::getDocumentLinks(const lsp::URIForFile &uri, - std::vector &links) { +void TableGenTextFile::getDocumentLinks( + const llvm::lsp::URIForFile &uri, + std::vector &links) { for (const lsp::SourceMgrInclude &include : parsedIncludes) links.emplace_back(include.range, include.uri); } @@ -518,9 +530,9 @@ void TableGenTextFile::getDocumentLinks(const lsp::URIForFile &uri, // TableGenTextFile: Hover //===----------------------------------------------------------------------===// -std::optional -TableGenTextFile::findHover(const lsp::URIForFile &uri, - const lsp::Position &hoverPos) { +std::optional +TableGenTextFile::findHover(const llvm::lsp::URIForFile &uri, + const llvm::lsp::Position &hoverPos) { // Check for a reference to an include. for (const lsp::SourceMgrInclude &include : parsedIncludes) if (include.range.contains(hoverPos)) @@ -546,9 +558,10 @@ TableGenTextFile::findHover(const lsp::URIForFile &uri, return buildHoverForField(recordVal->record, value, hoverRange); } -lsp::Hover TableGenTextFile::buildHoverForRecord(const Record *record, - const SMRange &hoverRange) { - lsp::Hover hover(lsp::Range(sourceMgr, hoverRange)); +llvm::lsp::Hover +TableGenTextFile::buildHoverForRecord(const Record *record, + const SMRange &hoverRange) { + llvm::lsp::Hover hover(llvm::lsp::Range(sourceMgr, hoverRange)); { llvm::raw_string_ostream hoverOS(hover.contents.value); @@ -590,9 +603,9 @@ lsp::Hover TableGenTextFile::buildHoverForRecord(const Record *record, return hover; } -lsp::Hover TableGenTextFile::buildHoverForTemplateArg( +llvm::lsp::Hover TableGenTextFile::buildHoverForTemplateArg( const Record *record, const RecordVal *value, const SMRange &hoverRange) { - lsp::Hover hover(lsp::Range(sourceMgr, hoverRange)); + llvm::lsp::Hover hover(llvm::lsp::Range(sourceMgr, hoverRange)); { llvm::raw_string_ostream hoverOS(hover.contents.value); StringRef name = value->getName().rsplit(':').second; @@ -604,10 +617,9 @@ lsp::Hover TableGenTextFile::buildHoverForTemplateArg( return hover; } -lsp::Hover TableGenTextFile::buildHoverForField(const Record *record, - const RecordVal *value, - const SMRange &hoverRange) { - lsp::Hover hover(lsp::Range(sourceMgr, hoverRange)); +llvm::lsp::Hover TableGenTextFile::buildHoverForField( + const Record *record, const RecordVal *value, const SMRange &hoverRange) { + llvm::lsp::Hover hover(llvm::lsp::Range(sourceMgr, hoverRange)); { llvm::raw_string_ostream hoverOS(hover.contents.value); hoverOS << "**field** `" << value->getName() << "`\n***\nType: `"; @@ -722,7 +734,7 @@ void lsp::TableGenServer::getDocumentLinks( return fileIt->second->getDocumentLinks(uri, documentLinks); } -std::optional +std::optional lsp::TableGenServer::findHover(const URIForFile &uri, const Position &hoverPos) { auto fileIt = impl->files.find(uri.file()); diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h index bdc851024a818..e54b8bcf35e24 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h @@ -11,6 +11,7 @@ #include "mlir/Support/LLVM.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/LSP/Protocol.h" #include #include #include @@ -18,13 +19,13 @@ namespace mlir { namespace lsp { -struct Diagnostic; -struct DocumentLink; -struct Hover; -struct Location; -struct Position; -struct TextDocumentContentChangeEvent; -class URIForFile; +using llvm::lsp::Diagnostic; +using llvm::lsp::DocumentLink; +using llvm::lsp::Hover; +using llvm::lsp::Location; +using llvm::lsp::Position; +using llvm::lsp::TextDocumentContentChangeEvent; +using llvm::lsp::URIForFile; /// This class implements all of the TableGen related functionality necessary /// for a language server. This class allows for keeping the TableGen specific diff --git a/mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp b/mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp index 10d602fdfe728..712237bbbbca6 100644 --- a/mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp +++ b/mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp @@ -10,8 +10,8 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/InitAllDialects.h" #include "mlir/InitAllExtensions.h" -#include "mlir/Tools/lsp-server-support/Protocol.h" #include "mlir/Tools/mlir-lsp-server/MlirLspServerMain.h" +#include "llvm/Support/LSP/Protocol.h" using namespace mlir; @@ -37,8 +37,8 @@ int main(int argc, char **argv) { // Returns the registry, except in testing mode when the URI contains // "-disable-lsp-registration". Testing for/example of registering dialects // based on URI. - auto registryFn = [®istry, - &empty](const lsp::URIForFile &uri) -> DialectRegistry & { + auto registryFn = [®istry, &empty]( + const llvm::lsp::URIForFile &uri) -> DialectRegistry & { (void)empty; #ifdef MLIR_INCLUDE_TESTS if (uri.uri().contains("-disable-lsp-registration")) diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt index c5f0d7e384d01..89332bce5fe05 100644 --- a/mlir/unittests/CMakeLists.txt +++ b/mlir/unittests/CMakeLists.txt @@ -18,7 +18,6 @@ add_subdirectory(Support) add_subdirectory(Rewrite) add_subdirectory(TableGen) add_subdirectory(Target) -add_subdirectory(Tools) add_subdirectory(Transforms) if(MLIR_ENABLE_EXECUTION_ENGINE) diff --git a/mlir/unittests/Tools/CMakeLists.txt b/mlir/unittests/Tools/CMakeLists.txt deleted file mode 100644 index a97588d928668..0000000000000 --- a/mlir/unittests/Tools/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory(lsp-server-support) diff --git a/mlir/unittests/Tools/lsp-server-support/CMakeLists.txt b/mlir/unittests/Tools/lsp-server-support/CMakeLists.txt deleted file mode 100644 index c539c9bc5101f..0000000000000 --- a/mlir/unittests/Tools/lsp-server-support/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_mlir_unittest(MLIRLspServerSupportTests - Protocol.cpp - Transport.cpp -) -mlir_target_link_libraries(MLIRLspServerSupportTests - PRIVATE - MLIRLspServerSupportLib) From 7628abdb87ccb18703d53cea014456bf764faa2a Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Thu, 11 Sep 2025 10:23:29 -0700 Subject: [PATCH 002/734] [scudo] Add tracing framework (#156112) Add a methodology to allow tracing. By default, this is disabled, but it can be enabled for any OS that supports it. Currently, only releaseToOSXXX functions have trace points added. --- compiler-rt/lib/scudo/standalone/combined.h | 4 +- compiler-rt/lib/scudo/standalone/primary32.h | 4 ++ compiler-rt/lib/scudo/standalone/primary64.h | 5 ++ compiler-rt/lib/scudo/standalone/secondary.h | 14 ++++-- compiler-rt/lib/scudo/standalone/tracing.h | 50 ++++++++++++++++++++ 5 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 compiler-rt/lib/scudo/standalone/tracing.h diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 985bfb49884d1..c9ba28a52f780 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -25,6 +25,7 @@ #include "size_class_allocator.h" #include "stack_depot.h" #include "string_utils.h" +#include "tracing.h" #include "tsd.h" #include "scudo/interface.h" @@ -671,10 +672,11 @@ class Allocator { void releaseToOS(ReleaseToOS ReleaseType) { initThreadMaybe(); + SCUDO_SCOPED_TRACE(GetReleaseToOSTraceName(ReleaseType)); if (ReleaseType == ReleaseToOS::ForceAll) drainCaches(); Primary.releaseToOS(ReleaseType); - Secondary.releaseToOS(); + Secondary.releaseToOS(ReleaseType); } // Iterate over all chunks and call a callback for all busy chunks located diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index e2de50b93adc3..49aa74adfc10a 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -511,6 +511,8 @@ uptr SizeClassAllocator32::tryReleaseToOS(uptr ClassId, template uptr SizeClassAllocator32::releaseToOS(ReleaseToOS ReleaseType) { + SCUDO_SCOPED_TRACE(GetPrimaryReleaseToOSTraceName(ReleaseType)); + uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) @@ -1056,6 +1058,8 @@ uptr SizeClassAllocator32::releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, ReleaseToOS ReleaseType) REQUIRES(Sci->Mutex) { + SCUDO_SCOPED_TRACE(GetPrimaryReleaseToOSMaybeTraceName(ReleaseType)); + const uptr BlockSize = getSizeByClassId(ClassId); DCHECK_GE(Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks); diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 3cb040c514eda..7727049426b47 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -22,6 +22,7 @@ #include "stats.h" #include "string_utils.h" #include "thread_annotations.h" +#include "tracing.h" namespace scudo { @@ -1307,6 +1308,8 @@ uptr SizeClassAllocator64::tryReleaseToOS(uptr ClassId, template uptr SizeClassAllocator64::releaseToOS(ReleaseToOS ReleaseType) { + SCUDO_SCOPED_TRACE(GetPrimaryReleaseToOSTraceName(ReleaseType)); + uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) @@ -1376,6 +1379,8 @@ uptr SizeClassAllocator64::releaseToOSMaybe(RegionInfo *Region, uptr ClassId, ReleaseToOS ReleaseType) REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + SCUDO_SCOPED_TRACE(GetPrimaryReleaseToOSMaybeTraceName(ReleaseType)); + const uptr BlockSize = getSizeByClassId(ClassId); uptr BytesInFreeList; const uptr AllocatedUserEnd = diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index 38c9a9e6e2d70..f0b7bceb010f0 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -19,6 +19,7 @@ #include "stats.h" #include "string_utils.h" #include "thread_annotations.h" +#include "tracing.h" #include "vector.h" namespace scudo { @@ -118,7 +119,7 @@ template class MapAllocatorNoCache { bool canCache(UNUSED uptr Size) { return false; } void disable() {} void enable() {} - void releaseToOS() {} + void releaseToOS(ReleaseToOS) {} void disableMemoryTagging() {} void unmapTestOnly() {} bool setOption(Option O, UNUSED sptr Value) { @@ -351,6 +352,9 @@ class MapAllocatorCache { // same time will not actually release any extra elements. Therefore, // let any other thread continue, skipping the release. if (Mutex.tryLock()) { + SCUDO_SCOPED_TRACE( + GetSecondaryReleaseToOSTraceName(ReleaseToOS::Normal)); + // TODO: Add ReleaseToOS logic to LRU algorithm releaseOlderThan(Time - static_cast(Interval) * 1000000); Mutex.unlock(); @@ -499,7 +503,9 @@ class MapAllocatorCache { return true; } - void releaseToOS() EXCLUDES(Mutex) { + void releaseToOS([[maybe_unused]] ReleaseToOS ReleaseType) EXCLUDES(Mutex) { + SCUDO_SCOPED_TRACE(GetSecondaryReleaseToOSTraceName(ReleaseType)); + // Since this is a request to release everything, always wait for the // lock so that we guarantee all entries are released after this call. ScopedLock L(Mutex); @@ -574,6 +580,8 @@ class MapAllocatorCache { } void releaseOlderThan(u64 Time) REQUIRES(Mutex) { + SCUDO_SCOPED_TRACE(GetSecondaryReleaseOlderThanTraceName()); + if (!LRUEntries.size() || OldestTime == 0 || OldestTime > Time) return; OldestTime = 0; @@ -669,7 +677,7 @@ template class MapAllocator { bool setOption(Option O, sptr Value) { return Cache.setOption(O, Value); } - void releaseToOS() { Cache.releaseToOS(); } + void releaseToOS(ReleaseToOS ReleaseType) { Cache.releaseToOS(ReleaseType); } void disableMemoryTagging() { Cache.disableMemoryTagging(); } diff --git a/compiler-rt/lib/scudo/standalone/tracing.h b/compiler-rt/lib/scudo/standalone/tracing.h new file mode 100644 index 0000000000000..ac1f746128823 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/tracing.h @@ -0,0 +1,50 @@ +//===-- tracing.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_TRACING_H_ +#define SCUDO_TRACING_H_ + +#if defined(SCUDO_ENABLE_TRACING) + +// This file must include definitions for all of the functions below. +#include "custom_scudo_tracing.h" + +#else + +// Should start a trace in the given scope, and end the trace when going out of +// scope. +#define SCUDO_SCOPED_TRACE(Name) + +// Create a trace name for the call to releaseToOS. +static inline const char *GetReleaseToOSTraceName(scudo::ReleaseToOS) { + return nullptr; +} + +// Create a trace name for the call to releaseToOSMaybe in the primary. +static inline const char * +GetPrimaryReleaseToOSMaybeTraceName(scudo::ReleaseToOS) { + return nullptr; +} + +static inline const char *GetPrimaryReleaseToOSTraceName(scudo::ReleaseToOS) { + return nullptr; +} + +// Create a trace name for the call to releaseToOS in the secondary. +static inline const char *GetSecondaryReleaseToOSTraceName(scudo::ReleaseToOS) { + return nullptr; +} + +// Create a trace name for the call to releaseOlderThan in the secondary. +static inline const char *GetSecondaryReleaseOlderThanTraceName() { + return nullptr; +} + +#endif + +#endif // SCUDO_TRACING_H_ From 299ba5dae13cfcad86e9c6f11b88178509d98e80 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 11 Sep 2025 19:40:10 +0200 Subject: [PATCH 003/734] [SupportLSP] Fix dependency on Support --- llvm/lib/Support/LSP/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/LSP/CMakeLists.txt b/llvm/lib/Support/LSP/CMakeLists.txt index 6094d9ac315c0..6bc9d636fbdfe 100644 --- a/llvm/lib/Support/LSP/CMakeLists.txt +++ b/llvm/lib/Support/LSP/CMakeLists.txt @@ -3,6 +3,6 @@ add_llvm_component_library(LLVMSupportLSP Transport.cpp Logging.cpp - DEPENDS - LLVMSupport + LINK_COMPONENTS + Support ) From 8c0f3b6e8f8db76e4ef47f38fb7b32ba9be1913b Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov <99913765+grigorypas@users.noreply.github.com> Date: Thu, 11 Sep 2025 10:41:11 -0700 Subject: [PATCH 004/734] [BOLT] Fix debug line emission for functions in multiple compilation units (#151230) This patch fixes a bug in BOLT's debug line emission where functions that belong to multiple compilation units (such as inline functions in header files) were not handled correctly. Previously, BOLT incorrectly assumed that a binary function could belong to only one compilation unit, leading to incomplete or incorrect debug line information. ### **Problem** When a function appears in multiple compilation units (common scenarios include): * Template instantiated functions * Inline functions defined in header files included by multiple source files BOLT would only emit debug line information for one compilation unit, losing debug information for other CUs where the function was compiled. This resulted in incomplete debugging information and could cause debuggers to fail to set breakpoints or show incorrect source locations. ### **Root Cause** The issue was in BOLT's assumption that each binary function maps to exactly one compilation unit. However, when the same function (e.g., an inline function from a header) is compiled into multiple object files, it legitimately belongs to multiple CUs in the final binary. --- bolt/include/bolt/Core/BinaryContext.h | 6 + bolt/include/bolt/Core/BinaryFunction.h | 26 +- bolt/include/bolt/Core/DebugData.h | 113 ++++++-- bolt/lib/Core/BinaryContext.cpp | 55 ++-- bolt/lib/Core/BinaryEmitter.cpp | 200 +++++++++------ bolt/lib/Core/BinaryFunction.cpp | 55 ++-- bolt/lib/Core/DebugData.cpp | 2 - bolt/test/X86/multi-cu-debug-line.s | 327 ++++++++++++++++++++++++ bolt/test/lit.cfg.py | 1 + bolt/test/process-debug-line | 105 ++++++++ bolt/unittests/Core/CMakeLists.txt | 1 + bolt/unittests/Core/ClusteredRows.cpp | 141 ++++++++++ 12 files changed, 873 insertions(+), 159 deletions(-) create mode 100644 bolt/test/X86/multi-cu-debug-line.s create mode 100755 bolt/test/process-debug-line create mode 100644 bolt/unittests/Core/ClusteredRows.cpp diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 91ecf89da618c..72c8817daa714 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -288,6 +288,12 @@ class BinaryContext { /// overwritten, but it is okay to re-generate debug info for them. std::set ProcessedCUs; + /// DWARF-related container to manage lifecycle of groups of rows from line + /// tables associated with instructions. Since binary functions can span + /// multiple compilation units, instructions may reference debug line + /// information from multiple CUs. + ClusteredRowsContainer ClusteredRows; + // Setup MCPlus target builder void initializeTarget(std::unique_ptr TargetBuilder) { MIB = std::move(TargetBuilder); diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index b59926cc75571..51b139a15e1a0 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -35,6 +35,7 @@ #include "bolt/Core/JumpTable.h" #include "bolt/Core/MCPlus.h" #include "bolt/Utils/NameResolver.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -423,8 +424,9 @@ class BinaryFunction { /// Original LSDA type encoding unsigned LSDATypeEncoding{dwarf::DW_EH_PE_omit}; - /// Containing compilation unit for the function. - DWARFUnit *DwarfUnit{nullptr}; + /// All compilation units this function belongs to. + /// Maps DWARF unit offset to the unit pointer. + DenseMap DwarfUnitMap; /// Last computed hash value. Note that the value could be recomputed using /// different parameters by every pass. @@ -2409,15 +2411,21 @@ class BinaryFunction { void computeBlockHashes(HashFunction HashFunction = HashFunction::Default) const; - void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; } + void addDWARFUnit(DWARFUnit *Unit) { DwarfUnitMap[Unit->getOffset()] = Unit; } - /// Return DWARF compile unit for this function. - DWARFUnit *getDWARFUnit() const { return DwarfUnit; } + void removeDWARFUnit(DWARFUnit *Unit) { + DwarfUnitMap.erase(Unit->getOffset()); + } + + /// Return DWARF compile units for this function. + /// Returns a reference to the map of DWARF unit offsets to units. + const DenseMap &getDWARFUnits() const { + return DwarfUnitMap; + } - /// Return line info table for this function. - const DWARFDebugLine::LineTable *getDWARFLineTable() const { - return getDWARFUnit() ? BC.DwCtx->getLineTableForUnit(getDWARFUnit()) - : nullptr; + const DWARFDebugLine::LineTable * + getDWARFLineTableForUnit(DWARFUnit *Unit) const { + return BC.DwCtx->getLineTableForUnit(Unit); } /// Finalize profile for the function. diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h index 6ea3b1af1024f..814978965ce3a 100644 --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -135,8 +135,6 @@ struct DebugLineTableRowRef { uint32_t DwCompileUnitIndex; uint32_t RowIndex; - const static DebugLineTableRowRef NULL_ROW; - bool operator==(const DebugLineTableRowRef &Rhs) const { return DwCompileUnitIndex == Rhs.DwCompileUnitIndex && RowIndex == Rhs.RowIndex; @@ -145,24 +143,6 @@ struct DebugLineTableRowRef { bool operator!=(const DebugLineTableRowRef &Rhs) const { return !(*this == Rhs); } - - static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) { - union { - decltype(Loc.getPointer()) Ptr; - DebugLineTableRowRef Ref; - } U; - U.Ptr = Loc.getPointer(); - return U.Ref; - } - - SMLoc toSMLoc() const { - union { - decltype(SMLoc().getPointer()) Ptr; - DebugLineTableRowRef Ref; - } U; - U.Ref = *this; - return SMLoc::getFromPointer(U.Ptr); - } }; /// Common buffer vector used for debug info handling. @@ -210,7 +190,7 @@ class DebugRangesSectionWriter { static bool classof(const DebugRangesSectionWriter *Writer) { return Writer->getKind() == RangesWriterKind::DebugRangesWriter; } - + /// Append a range to the main buffer. void appendToRangeBuffer(const DebugBufferVector &CUBuffer); @@ -852,6 +832,97 @@ class DwarfLineTable { // Returns DWARF Version for this line table. uint16_t getDwarfVersion() const { return DwarfVersion; } }; + +/// ClusteredRows represents a collection of debug line table row references. +/// +/// MEMORY LAYOUT AND DESIGN: +/// This class uses a flexible array member pattern to store all +/// DebugLineTableRowRef elements in a single contiguous memory allocation. +/// The memory layout is: +/// +/// +------------------+ +/// | ClusteredRows | <- Object header (Size + first element) +/// | - Size | +/// | - Rows (element) | <- First DebugLineTableRowRef element +/// +------------------+ +/// | element[1] | <- Additional DebugLineTableRowRef elements +/// | element[2] | stored immediately after the object +/// | ... | +/// | element[Size-1] | +/// +------------------+ +/// +/// The 'Rows' member serves as both the first element storage and the base +/// address for pointer arithmetic to access subsequent elements. +class ClusteredRows { +public: + ArrayRef getRows() const { + return ArrayRef(beginPtrConst(), Size); + } + + /// Returns the number of elements in the array. + uint64_t size() const { return Size; } + + /// We re-purpose SMLoc inside MCInst to store the pointer + /// to ClusteredRows. fromSMLoc() and toSMLoc() are helper + /// functions to convert between SMLoc and ClusteredRows. + + static const ClusteredRows *fromSMLoc(const SMLoc &Loc) { + return reinterpret_cast(Loc.getPointer()); + } + SMLoc toSMLoc() const { + return SMLoc::getFromPointer(reinterpret_cast(this)); + } + + /// Given a vector of DebugLineTableRowRef, this method + /// copies the elements into pre-allocated memory. + template void populate(const T Vec) { + assert(Vec.size() == Size && "Sizes must match"); + DebugLineTableRowRef *CurRawPtr = beginPtr(); + for (DebugLineTableRowRef RowRef : Vec) { + *CurRawPtr = RowRef; + ++CurRawPtr; + } + } + +private: + uint64_t Size; + DebugLineTableRowRef Rows; + + ClusteredRows(uint64_t Size) : Size(Size) {} + + /// Total size of the object including the array. + static uint64_t getTotalSize(uint64_t Size) { + assert(Size > 0 && "Size must be greater than 0"); + return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef); + } + const DebugLineTableRowRef *beginPtrConst() const { + return reinterpret_cast(&Rows); + } + DebugLineTableRowRef *beginPtr() { + return reinterpret_cast(&Rows); + } + + friend class ClusteredRowsContainer; +}; + +/// ClusteredRowsContainer manages the lifecycle of ClusteredRows objects. +class ClusteredRowsContainer { +public: + ClusteredRows *createClusteredRows(uint64_t Size) { + auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size))) + ClusteredRows(Size); + Clusters.push_back(CR); + return CR; + } + ~ClusteredRowsContainer() { + for (auto *CR : Clusters) + std::free(CR); + } + +private: + std::vector Clusters; +}; + } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 23a5a65c2c5f0..6d16edfff73d1 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1693,22 +1693,39 @@ void BinaryContext::preprocessDebugInfo() { auto It = llvm::partition_point( AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); - if (It != AllRanges.end() && It->LowPC <= FunctionAddress) - Function.setDWARFUnit(It->Unit); + if (It == AllRanges.end() || It->LowPC > FunctionAddress) { + continue; + } + Function.addDWARFUnit(It->Unit); + + // Go forward and add all units from ranges that cover the function. + while (++It != AllRanges.end()) { + if (It->LowPC > FunctionAddress || FunctionAddress >= It->HighPC) + break; + Function.addDWARFUnit(It->Unit); + } } // Discover units with debug info that needs to be updated. for (const auto &KV : BinaryFunctions) { const BinaryFunction &BF = KV.second; - if (shouldEmit(BF) && BF.getDWARFUnit()) - ProcessedCUs.insert(BF.getDWARFUnit()); + if (shouldEmit(BF) && !BF.getDWARFUnits().empty()) + for (const auto &[_, Unit] : BF.getDWARFUnits()) + ProcessedCUs.insert(Unit); } - // Clear debug info for functions from units that we are not going to process. for (auto &KV : BinaryFunctions) { BinaryFunction &BF = KV.second; - if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) - BF.setDWARFUnit(nullptr); + // Collect units to remove to avoid iterator invalidation + SmallVector UnitsToRemove; + for (const auto &[_, Unit] : BF.getDWARFUnits()) { + if (!ProcessedCUs.count(Unit)) + UnitsToRemove.push_back(Unit); + } + // Remove the collected units + for (auto *Unit : UnitsToRemove) { + BF.removeDWARFUnit(Unit); + } } if (opts::Verbosity >= 1) { @@ -1903,23 +1920,23 @@ bool BinaryContext::isMarker(const SymbolRef &Symbol) const { static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, const BinaryFunction *Function, DWARFContext *DwCtx) { - DebugLineTableRowRef RowRef = - DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); - if (RowRef == DebugLineTableRowRef::NULL_ROW) + const ClusteredRows *LineTableRows = + ClusteredRows::fromSMLoc(Instruction.getLoc()); + if (LineTableRows == nullptr) return; - const DWARFDebugLine::LineTable *LineTable; - if (Function && Function->getDWARFUnit() && - Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { - LineTable = Function->getDWARFLineTable(); - } else { - LineTable = DwCtx->getLineTableForUnit( - DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); - } - assert(LineTable && "line table expected for instruction with debug info"); + // File name and line number should be the same for all CUs. + // So it is sufficient to check the first one. + DebugLineTableRowRef RowRef = LineTableRows->getRows().front(); + const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit( + DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); + + if (!LineTable) + return; const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; StringRef FileName = ""; + if (std::optional FName = dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name)) FileName = *FName; diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 7b5cd276fee89..7aaf721da9769 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -177,7 +177,8 @@ class BinaryEmitter { /// Note that it does not automatically result in the insertion of the EOS /// marker in the line table program, but provides one to the DWARF generator /// when it needs it. - void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol); + void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol, + const DWARFUnit &Unit); /// Emit debug line info for unprocessed functions from CUs that include /// emitted functions. @@ -436,8 +437,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, Streamer.emitELFSize(StartSymbol, SizeExpr); } - if (opts::UpdateDebugSections && Function.getDWARFUnit()) - emitLineInfoEnd(Function, EndSymbol); + if (opts::UpdateDebugSections && !Function.getDWARFUnits().empty()) + for (const auto &[_, Unit] : Function.getDWARFUnits()) + emitLineInfoEnd(Function, EndSymbol, *Unit); // Exception handling info for the function. emitLSDA(Function, FF); @@ -486,7 +488,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, // A symbol to be emitted before the instruction to mark its location. MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr); - if (opts::UpdateDebugSections && BF.getDWARFUnit()) { + if (opts::UpdateDebugSections && !BF.getDWARFUnits().empty()) { LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen, FirstInstr, InstrLabel); FirstInstr = false; @@ -679,74 +681,100 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart, SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, SMLoc PrevLoc, bool FirstInstr, MCSymbol *&InstrLabel) { - DWARFUnit *FunctionCU = BF.getDWARFUnit(); - const DWARFDebugLine::LineTable *FunctionLineTable = BF.getDWARFLineTable(); - assert(FunctionCU && "cannot emit line info for function without CU"); - - DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc); - - // Check if no new line info needs to be emitted. - if (RowReference == DebugLineTableRowRef::NULL_ROW || + if (NewLoc.getPointer() == nullptr || NewLoc.getPointer() == PrevLoc.getPointer()) return PrevLoc; + const ClusteredRows *Cluster = ClusteredRows::fromSMLoc(NewLoc); + + auto addToLineTable = [&](DebugLineTableRowRef RowReference, + const DWARFUnit &TargetCU, unsigned Flags, + MCSymbol &InstrLabel, + const DWARFDebugLine::Row &CurrentRow) { + const uint64_t TargetUnitIndex = TargetCU.getOffset(); + unsigned TargetFilenum = CurrentRow.File; + const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex; + // If the CU id from the current instruction location does not + // match the target CU id, it means that we have come across some + // inlined code (by BOLT). We must look up the CU for the instruction's + // original function and get the line table from that. + if (TargetUnitIndex != CurrentUnitIndex) { + // Add filename from the inlined function to the current CU. + TargetFilenum = BC.addDebugFilenameToUnit( + TargetUnitIndex, CurrentUnitIndex, CurrentRow.File); + } + BC.Ctx->setCurrentDwarfLoc(TargetFilenum, CurrentRow.Line, + CurrentRow.Column, Flags, CurrentRow.Isa, + CurrentRow.Discriminator); + const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); + BC.Ctx->clearDwarfLocSeen(); + const MCLineSection::MCLineDivisionMap &MapLineEntries = + BC.getDwarfLineTable(TargetUnitIndex) + .getMCLineSections() + .getMCLineEntries(); + const auto *It = MapLineEntries.find(Streamer.getCurrentSectionOnly()); + MCDwarfLineEntry NewLineEntry = MCDwarfLineEntry(&InstrLabel, DwarfLoc); + + // Check if line table exists and has entries before doing comparison. + if (It != MapLineEntries.end() && !It->second.empty()) { + // Check if the new line entry has the same debug info as the last one + // to avoid duplicates. We don't compare labels since different + // instructions can have the same line info. + const auto &LastEntry = It->second.back(); + if (LastEntry.getFileNum() == NewLineEntry.getFileNum() && + LastEntry.getLine() == NewLineEntry.getLine() && + LastEntry.getColumn() == NewLineEntry.getColumn() && + LastEntry.getFlags() == NewLineEntry.getFlags() && + LastEntry.getIsa() == NewLineEntry.getIsa() && + LastEntry.getDiscriminator() == NewLineEntry.getDiscriminator()) + return; + } - unsigned CurrentFilenum = 0; - const DWARFDebugLine::LineTable *CurrentLineTable = FunctionLineTable; - - // If the CU id from the current instruction location does not - // match the CU id from the current function, it means that we - // have come across some inlined code. We must look up the CU - // for the instruction's original function and get the line table - // from that. - const uint64_t FunctionUnitIndex = FunctionCU->getOffset(); - const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex; - if (CurrentUnitIndex != FunctionUnitIndex) { - CurrentLineTable = BC.DwCtx->getLineTableForUnit( - BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex)); - // Add filename from the inlined function to the current CU. - CurrentFilenum = BC.addDebugFilenameToUnit( - FunctionUnitIndex, CurrentUnitIndex, - CurrentLineTable->Rows[RowReference.RowIndex - 1].File); - } - - const DWARFDebugLine::Row &CurrentRow = - CurrentLineTable->Rows[RowReference.RowIndex - 1]; - if (!CurrentFilenum) - CurrentFilenum = CurrentRow.File; - - unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) | - (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) | - (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) | - (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin); - - // Always emit is_stmt at the beginning of function fragment. - if (FirstInstr) - Flags |= DWARF2_FLAG_IS_STMT; - - BC.Ctx->setCurrentDwarfLoc(CurrentFilenum, CurrentRow.Line, CurrentRow.Column, - Flags, CurrentRow.Isa, CurrentRow.Discriminator); - const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); - BC.Ctx->clearDwarfLocSeen(); + BC.getDwarfLineTable(TargetUnitIndex) + .getMCLineSections() + .addLineEntry(NewLineEntry, Streamer.getCurrentSectionOnly()); + }; if (!InstrLabel) InstrLabel = BC.Ctx->createTempSymbol(); - - BC.getDwarfLineTable(FunctionUnitIndex) - .getMCLineSections() - .addLineEntry(MCDwarfLineEntry(InstrLabel, DwarfLoc), - Streamer.getCurrentSectionOnly()); + for (DebugLineTableRowRef RowReference : Cluster->getRows()) { + const DWARFDebugLine::LineTable *CurrentLineTable = + BC.DwCtx->getLineTableForUnit( + BC.DwCtx->getCompileUnitForOffset(RowReference.DwCompileUnitIndex)); + const DWARFDebugLine::Row &CurrentRow = + CurrentLineTable->Rows[RowReference.RowIndex - 1]; + unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) | + (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) | + (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) | + (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin); + + // Always emit is_stmt at the beginning of function fragment. + if (FirstInstr) + Flags |= DWARF2_FLAG_IS_STMT; + const auto &FunctionDwarfUnits = BF.getDWARFUnits(); + auto It = FunctionDwarfUnits.find(RowReference.DwCompileUnitIndex); + if (It != FunctionDwarfUnits.end()) { + addToLineTable(RowReference, *It->second, Flags, *InstrLabel, CurrentRow); + continue; + } + // This rows is from CU that did not contain the original function. + // This might happen if BOLT moved/inlined that instruction from other CUs. + // In this case, we need to insert it to all CUs that the function + // originally beloned to. + for (const auto &[_, Unit] : BF.getDWARFUnits()) { + addToLineTable(RowReference, *Unit, Flags, *InstrLabel, CurrentRow); + } + } return NewLoc; } void BinaryEmitter::emitLineInfoEnd(const BinaryFunction &BF, - MCSymbol *FunctionEndLabel) { - DWARFUnit *FunctionCU = BF.getDWARFUnit(); - assert(FunctionCU && "DWARF unit expected"); + MCSymbol *FunctionEndLabel, + const DWARFUnit &Unit) { BC.Ctx->setCurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_END_SEQUENCE, 0, 0); const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); BC.Ctx->clearDwarfLocSeen(); - BC.getDwarfLineTable(FunctionCU->getOffset()) + BC.getDwarfLineTable(Unit.getOffset()) .getMCLineSections() .addLineEntry(MCDwarfLineEntry(FunctionEndLabel, DwarfLoc), Streamer.getCurrentSectionOnly()); @@ -1115,36 +1143,40 @@ void BinaryEmitter::emitDebugLineInfoForOriginalFunctions() { if (Function.isEmitted()) continue; - const DWARFDebugLine::LineTable *LineTable = Function.getDWARFLineTable(); - if (!LineTable) - continue; // nothing to update for this function + // Loop through all CUs in the function + for (const auto &[_, Unit] : Function.getDWARFUnits()) { + const DWARFDebugLine::LineTable *LineTable = + Function.getDWARFLineTableForUnit(Unit); + if (!LineTable) + continue; // nothing to update for this unit + + const uint64_t Address = Function.getAddress(); + std::vector Results; + if (!LineTable->lookupAddressRange( + {Address, object::SectionedAddress::UndefSection}, + Function.getSize(), Results)) + continue; - const uint64_t Address = Function.getAddress(); - std::vector Results; - if (!LineTable->lookupAddressRange( - {Address, object::SectionedAddress::UndefSection}, - Function.getSize(), Results)) - continue; + if (Results.empty()) + continue; - if (Results.empty()) - continue; + // The first row returned could be the last row matching the start + // address. Find the first row with the same address that is not the end + // of the sequence. + uint64_t FirstRow = Results.front(); + while (FirstRow > 0) { + const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1]; + if (PrevRow.Address.Address != Address || PrevRow.EndSequence) + break; + --FirstRow; + } - // The first row returned could be the last row matching the start address. - // Find the first row with the same address that is not the end of the - // sequence. - uint64_t FirstRow = Results.front(); - while (FirstRow > 0) { - const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1]; - if (PrevRow.Address.Address != Address || PrevRow.EndSequence) - break; - --FirstRow; + const uint64_t EndOfSequenceAddress = + Function.getAddress() + Function.getMaxSize(); + BC.getDwarfLineTable(Unit->getOffset()) + .addLineTableSequence(LineTable, FirstRow, Results.back(), + EndOfSequenceAddress); } - - const uint64_t EndOfSequenceAddress = - Function.getAddress() + Function.getMaxSize(); - BC.getDwarfLineTable(Function.getDWARFUnit()->getOffset()) - .addLineTableSequence(LineTable, FirstRow, Results.back(), - EndOfSequenceAddress); } // For units that are completely unprocessed, use original debug line contents diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 6cac2d0cca2cb..578a87dc6c09d 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -179,37 +179,29 @@ template static bool emptyRange(const R &Range) { } /// Gets debug line information for the instruction located at the given -/// address in the original binary. The SMLoc's pointer is used -/// to point to this information, which is represented by a -/// DebugLineTableRowRef. The returned pointer is null if no debug line -/// information for this instruction was found. -static SMLoc findDebugLineInformationForInstructionAt( +/// address in the original binary. Returns an optional DebugLineTableRowRef +/// that references the corresponding row in the DWARF line table. Since binary +/// functions can span multiple compilation units, this function helps +/// associate instructions with their debug line information from the +/// appropriate CU. Returns std::nullopt if no debug line information for +/// this instruction was found. +static std::optional +findDebugLineInformationForInstructionAt( uint64_t Address, DWARFUnit *Unit, const DWARFDebugLine::LineTable *LineTable) { - // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef, - // which occupies 64 bits. Thus, we can only proceed if the struct fits into - // the pointer itself. - static_assert( - sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef), - "Cannot fit instruction debug line information into SMLoc's pointer"); - - SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc(); uint32_t RowIndex = LineTable->lookupAddress( {Address, object::SectionedAddress::UndefSection}); if (RowIndex == LineTable->UnknownRowIndex) - return NullResult; + return std::nullopt; assert(RowIndex < LineTable->Rows.size() && "Line Table lookup returned invalid index."); - decltype(SMLoc().getPointer()) Ptr; - DebugLineTableRowRef *InstructionLocation = - reinterpret_cast(&Ptr); - - InstructionLocation->DwCompileUnitIndex = Unit->getOffset(); - InstructionLocation->RowIndex = RowIndex + 1; + DebugLineTableRowRef InstructionLocation; + InstructionLocation.DwCompileUnitIndex = Unit->getOffset(); + InstructionLocation.RowIndex = RowIndex + 1; - return SMLoc::getFromPointer(Ptr); + return InstructionLocation; } static std::string buildSectionName(StringRef Prefix, StringRef Name, @@ -1496,9 +1488,24 @@ Error BinaryFunction::disassemble() { } add_instruction: - if (getDWARFLineTable()) { - Instruction.setLoc(findDebugLineInformationForInstructionAt( - AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable())); + if (!getDWARFUnits().empty()) { + SmallVector Rows; + for (const auto &[_, Unit] : getDWARFUnits()) { + const DWARFDebugLine::LineTable *LineTable = + getDWARFLineTableForUnit(Unit); + if (!LineTable) + continue; + if (std::optional RowRef = + findDebugLineInformationForInstructionAt(AbsoluteInstrAddr, + Unit, LineTable)) + Rows.emplace_back(*RowRef); + } + if (!Rows.empty()) { + ClusteredRows *Cluster = + BC.ClusteredRows.createClusteredRows(Rows.size()); + Cluster->populate(Rows); + Instruction.setLoc(Cluster->toSMLoc()); + } } // Record offset of the instruction for profile matching. diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp index 521eb8d91bbc0..e05f28f08572c 100644 --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -101,8 +101,6 @@ std::optional findAttributeInfo(const DWARFDie DIE, return findAttributeInfo(DIE, AbbrevDecl, *Index); } -const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0}; - LLVM_ATTRIBUTE_UNUSED static void printLE64(const std::string &S) { for (uint32_t I = 0, Size = S.size(); I < Size; ++I) { diff --git a/bolt/test/X86/multi-cu-debug-line.s b/bolt/test/X86/multi-cu-debug-line.s new file mode 100644 index 0000000000000..15f49a211e58b --- /dev/null +++ b/bolt/test/X86/multi-cu-debug-line.s @@ -0,0 +1,327 @@ +## Test that BOLT correctly handles debug line information for functions +## that belong to multiple compilation units (e.g., inline functions in +## common header files). This is the assembly version of the multi-cu-debug-line.test. +## The test covers two scenarios: +## 1. Normal processing: .debug_line section shows lines for the function +## in all CUs where it was compiled, with no duplicate rows within CUs +## 2. Functions not processed: When BOLT doesn't process functions (using +## --funcs with nonexistent function), original debug info is preserved + +# REQUIRES: system-linux + +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/multi-cu-file1.s -o %t/multi-cu-file1.o +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/multi-cu-file2.s -o %t/multi-cu-file2.o +# RUN: %clang %cflags %t/multi-cu-file1.o %t/multi-cu-file2.o -o %t.exe -Wl,-q + +## Test 1: Normal BOLT processing (functions are processed/optimized) +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-line %t.bolt > %t.debug-line.txt +# RUN: FileCheck %s --check-prefix=BASIC --input-file %t.debug-line.txt + +## Check that debug line information is present for both compilation units +# BASIC: debug_line[{{.*}}] +# BASIC: file_names[{{.*}}]: +# BASIC: name: "{{.*}}multi-cu-file1.c" +# BASIC: debug_line[{{.*}}] +# BASIC: file_names[{{.*}}]: +# BASIC: name: "{{.*}}multi-cu-file2.c" + +## Use our helper script to create a normalized table without addresses +# RUN: process-debug-line %t.debug-line.txt > %t.normalized-debug-line.txt +# RUN: FileCheck %s --check-prefix=NORMALIZED --input-file %t.normalized-debug-line.txt + +## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h +## in both compilation units +# NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h + +## Verify that we have line entries for the inline function in multiple CUs +## by checking that the header file appears multiple times in different contexts +# RUN: grep -c "multi-cu-common.h" %t.debug-line.txt > %t.header-count.txt +# RUN: FileCheck %s --check-prefix=MULTI-CU --input-file %t.header-count.txt + +## The header should appear in debug line info for multiple CUs +# MULTI-CU: {{[2-9]|[1-9][0-9]+}} + +## Check that there are no duplicate line table rows within the same CU +## This verifies the fix for the bug where duplicate entries were created +# RUN: sort %t.normalized-debug-line.txt | uniq -c | \ +# RUN: awk '$1 > 1 {print "DUPLICATE_ROW: " $0}' > %t.duplicates.txt +# RUN: FileCheck %s --check-prefix=NO-DUPLICATES --input-file %t.duplicates.txt --allow-empty + +## Should have no duplicate normalized rows (file should be empty) +## Note: Cross-CU duplicates are expected and valid (same function in different CUs) +## but within-CU duplicates would indicate a bug +# NO-DUPLICATES-NOT: DUPLICATE_ROW + +## Test 2: Functions not processed by BOLT (using --funcs with nonexistent function) +## This tests the code path where BOLT preserves original debug info +# RUN: llvm-bolt %t.exe -o %t.not-emitted.bolt --update-debug-sections --funcs=nonexistent_function +# RUN: llvm-dwarfdump --debug-line %t.not-emitted.bolt > %t.not-emitted.debug-line.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-BASIC --input-file %t.not-emitted.debug-line.txt + +## Check that debug line information is still present for both compilation units when functions aren't processed +# PRESERVED-BASIC: debug_line[{{.*}}] +# PRESERVED-BASIC: file_names[{{.*}}]: +# PRESERVED-BASIC: name: "{{.*}}multi-cu-file1.c" +# PRESERVED-BASIC: debug_line[{{.*}}] +# PRESERVED-BASIC: file_names[{{.*}}]: +# PRESERVED-BASIC: name: "{{.*}}multi-cu-file2.c" + +## Create normalized output for the not-emitted case +# RUN: process-debug-line %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-NORMALIZED --input-file %t.not-emitted.normalized.txt + +## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h +## in both compilation units (preserved from original) +# PRESERVED-NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h + +## Verify that we have line entries for the inline function in multiple CUs (preserved) +## by checking that the header file appears multiple times in different contexts +# RUN: grep -c "multi-cu-common.h" %t.not-emitted.debug-line.txt > %t.preserved-header-count.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-MULTI-CU --input-file %t.preserved-header-count.txt + +## The header should appear in debug line info for multiple CUs (preserved from original) +# PRESERVED-MULTI-CU: {{[2-9]|[1-9][0-9]+}} + +## Check that original debug info is preserved for main functions +# RUN: grep "multi-cu-file1.c.*multi-cu-file1.c" %t.not-emitted.normalized.txt > %t.preserved-main.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-MAIN --input-file %t.preserved-main.txt + +# PRESERVED-MAIN: multi-cu-file1.c {{[0-9]+}} {{[0-9]+}} multi-cu-file1.c + +## Check that original debug info is preserved for file2 functions +# RUN: grep "multi-cu-file2.c.*multi-cu-file2.c" %t.not-emitted.normalized.txt > %t.preserved-file2.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-FILE2 --input-file %t.preserved-file2.txt + +# PRESERVED-FILE2: multi-cu-file2.c {{[0-9]+}} {{[0-9]+}} multi-cu-file2.c + +;--- multi-cu-file1.s + .text + .file 1 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-file1.c" + .file 2 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-common.h" + + .globl main + .type main,@function +main: +.Lfunc_begin0: + .loc 1 4 0 + callq common_inline_function + .loc 1 8 0 + retq +.Lfunc_end0: + .size main, .Lfunc_end0-main + + .type common_inline_function,@function +common_inline_function: +.Lfunc_begin1: + .loc 2 5 0 + movl $42, %eax + .loc 2 6 0 + addl $10, %eax + .loc 2 7 0 + retq +.Lfunc_end1: + .size common_inline_function, .Lfunc_end1-common_inline_function + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x30 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 29 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .byte 2 # Abbrev [2] 0x3a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .long .Linfo_string4 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 18.0.0" +.Linfo_string1: + .asciz "/repo/llvm-project/bolt/test/Inputs/multi-cu-file1.c" +.Linfo_string2: + .asciz "/repo/llvm-project" +.Linfo_string3: + .asciz "main" +.Linfo_string4: + .asciz "common_inline_function" + + .section .debug_line,"",@progbits +.Lline_table_start0: + +;--- multi-cu-file2.s + .text + .file 1 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-file2.c" + .file 2 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-common.h" + + .globl helper_function + .type helper_function,@function +helper_function: +.Lfunc_begin0: + .loc 1 4 0 + callq common_inline_function + .loc 1 8 0 + retq +.Lfunc_end0: + .size helper_function, .Lfunc_end0-helper_function + + .type common_inline_function,@function +common_inline_function: +.Lfunc_begin1: + .loc 2 5 0 + movl $42, %eax + .loc 2 6 0 + addl $10, %eax + .loc 2 7 0 + retq +.Lfunc_end1: + .size common_inline_function, .Lfunc_end1-common_inline_function + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x30 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 29 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .byte 2 # Abbrev [2] 0x3a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .long .Linfo_string4 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 18.0.0" +.Linfo_string1: + .asciz "/repo/llvm-project/bolt/test/Inputs/multi-cu-file2.c" +.Linfo_string2: + .asciz "/repo/llvm-project" +.Linfo_string3: + .asciz "helper_function" +.Linfo_string4: + .asciz "common_inline_function" + + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index bef570ba50a04..3299051db4983 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -138,6 +138,7 @@ unresolved="fatal", extra_args=[link_fdata_cmd], ), + ToolSubst("process-debug-line", unresolved="fatal"), ToolSubst("merge-fdata", unresolved="fatal"), ToolSubst("llvm-readobj", unresolved="fatal"), ToolSubst("llvm-dwp", unresolved="fatal"), diff --git a/bolt/test/process-debug-line b/bolt/test/process-debug-line new file mode 100755 index 0000000000000..44cbcd1e5984a --- /dev/null +++ b/bolt/test/process-debug-line @@ -0,0 +1,105 @@ +#!/bin/sh + +# Script to process llvm-dwarfdump --debug-line output and create a normalized table +# Usage: process-debug-line.sh +# +# Output format: CU_FILE LINE COLUMN FILE_NAME [additional_info] +# This strips addresses to make rows unique and adds context about which CU and file each line belongs to + +if [ $# -ne 1 ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +debug_line_file="$1" + +if [ ! -f "$debug_line_file" ]; then + echo "Error: File '$debug_line_file' not found" >&2 + exit 1 +fi + +awk ' +BEGIN { + cu_count = 0 + current_cu_file = "" + # Initialize file names array + for (i = 0; i < 100; i++) { + current_file_names[i] = "" + } +} + +# Track debug_line sections (new CU) +/^debug_line\[/ { + cu_count++ + current_cu_file = "" + # Clear file names array for new CU + for (i = 0; i < 100; i++) { + current_file_names[i] = "" + } + next +} + +# Capture file names and their indices +/^file_names\[.*\]:/ { + # Extract file index using simple string operations + line_copy = $0 + gsub(/file_names\[/, "", line_copy) + gsub(/\]:.*/, "", line_copy) + gsub(/[ \t]/, "", line_copy) + file_index = line_copy + + getline # Read the next line which contains the actual filename + # Extract filename from name: "filename" format + if (match($0, /name:[ \t]*"/)) { + filename = $0 + gsub(/.*name:[ \t]*"/, "", filename) + gsub(/".*/, "", filename) + current_file_names[file_index] = filename + + # Extract basename for main CU file (first .c/.cpp/.cc file we see) + if (current_cu_file == "" && match(filename, /\.(c|cpp|cc)$/)) { + cu_filename = filename + gsub(/.*\//, "", cu_filename) + current_cu_file = cu_filename + } + } + next +} + +# Process line table entries +/^0x[0-9a-f]+/ { + # Parse the line entry: Address Line Column File ISA Discriminator OpIndex Flags + if (NF >= 4) { + line = $2 + column = $3 + file_index = $4 + + # Get the filename for this file index + filename = current_file_names[file_index] + if (filename == "") { + filename = "UNKNOWN_FILE_" file_index + } else { + # Extract just the basename + basename = filename + gsub(/.*\//, "", basename) + filename = basename + } + + # Build additional info (flags, etc.) + additional_info = "" + for (i = 8; i <= NF; i++) { + if (additional_info != "") { + additional_info = additional_info " " + } + additional_info = additional_info $i + } + + # Output normalized row: CU_FILE LINE COLUMN FILE_NAME [additional_info] + printf "%s %s %s %s", current_cu_file, line, column, filename + if (additional_info != "") { + printf " %s", additional_info + } + printf "\n" + } +} +' "$debug_line_file" diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt index f10b0d9472067..297dec7449202 100644 --- a/bolt/unittests/Core/CMakeLists.txt +++ b/bolt/unittests/Core/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS add_bolt_unittest(CoreTests BinaryContext.cpp + ClusteredRows.cpp MCPlusBuilder.cpp MemoryMaps.cpp DynoStats.cpp diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp new file mode 100644 index 0000000000000..4665022c91fdd --- /dev/null +++ b/bolt/unittests/Core/ClusteredRows.cpp @@ -0,0 +1,141 @@ +//===- bolt/unittest/Core/ClusteredRows.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/DebugData.h" +#include "llvm/Support/SMLoc.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; +using namespace llvm::bolt; + +namespace { + +class ClusteredRowsTest : public ::testing::Test { +protected: + void SetUp() override { + Container = std::make_unique(); + } + + std::unique_ptr Container; +}; + +TEST_F(ClusteredRowsTest, CreateSingleElement) { + ClusteredRows *CR = Container->createClusteredRows(1); + ASSERT_NE(CR, nullptr); + EXPECT_EQ(CR->size(), 1u); + + // Test population with single element + std::vector TestRefs = {{42, 100}}; + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 1u); + EXPECT_EQ(Rows[0].DwCompileUnitIndex, 42u); + EXPECT_EQ(Rows[0].RowIndex, 100u); +} + +TEST_F(ClusteredRowsTest, CreateMultipleElements) { + ClusteredRows *CR = Container->createClusteredRows(3); + ASSERT_NE(CR, nullptr); + EXPECT_EQ(CR->size(), 3u); + + // Test population with multiple elements + std::vector TestRefs = {{10, 20}, {30, 40}, {50, 60}}; + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 3u); + + EXPECT_EQ(Rows[0].DwCompileUnitIndex, 10u); + EXPECT_EQ(Rows[0].RowIndex, 20u); + + EXPECT_EQ(Rows[1].DwCompileUnitIndex, 30u); + EXPECT_EQ(Rows[1].RowIndex, 40u); + + EXPECT_EQ(Rows[2].DwCompileUnitIndex, 50u); + EXPECT_EQ(Rows[2].RowIndex, 60u); +} + +TEST_F(ClusteredRowsTest, SMLoc_Conversion) { + ClusteredRows *CR = Container->createClusteredRows(2); + ASSERT_NE(CR, nullptr); + + // Test SMLoc conversion + SMLoc Loc = CR->toSMLoc(); + EXPECT_TRUE(Loc.isValid()); + + // Test round-trip conversion + const ClusteredRows *CR2 = ClusteredRows::fromSMLoc(Loc); + EXPECT_EQ(CR, CR2); + EXPECT_EQ(CR2->size(), 2u); +} + +TEST_F(ClusteredRowsTest, PopulateWithArrayRef) { + ClusteredRows *CR = Container->createClusteredRows(4); + ASSERT_NE(CR, nullptr); + + // Test population with ArrayRef + DebugLineTableRowRef TestArray[] = {{1, 2}, {3, 4}, {5, 6}, {7, 8}}; + ArrayRef TestRefs(TestArray, 4); + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 4u); + + for (size_t i = 0; i < 4; ++i) { + EXPECT_EQ(Rows[i].DwCompileUnitIndex, TestArray[i].DwCompileUnitIndex); + EXPECT_EQ(Rows[i].RowIndex, TestArray[i].RowIndex); + } +} + +TEST_F(ClusteredRowsTest, MultipleClusteredRows) { + // Test creating multiple ClusteredRows objects + ClusteredRows *CR1 = Container->createClusteredRows(2); + ClusteredRows *CR2 = Container->createClusteredRows(3); + ClusteredRows *CR3 = Container->createClusteredRows(1); + + ASSERT_NE(CR1, nullptr); + ASSERT_NE(CR2, nullptr); + ASSERT_NE(CR3, nullptr); + + // Ensure they are different objects + EXPECT_NE(CR1, CR2); + EXPECT_NE(CR2, CR3); + EXPECT_NE(CR1, CR3); + + // Verify sizes + EXPECT_EQ(CR1->size(), 2u); + EXPECT_EQ(CR2->size(), 3u); + EXPECT_EQ(CR3->size(), 1u); + + // Populate each with different data + std::vector TestRefs1 = {{100, 200}, {300, 400}}; + std::vector TestRefs2 = {{10, 20}, {30, 40}, {50, 60}}; + std::vector TestRefs3 = {{999, 888}}; + + CR1->populate(TestRefs1); + CR2->populate(TestRefs2); + CR3->populate(TestRefs3); + + // Verify data integrity + ArrayRef Rows1 = CR1->getRows(); + ArrayRef Rows2 = CR2->getRows(); + ArrayRef Rows3 = CR3->getRows(); + + EXPECT_EQ(Rows1[0].DwCompileUnitIndex, 100u); + EXPECT_EQ(Rows1[1].RowIndex, 400u); + + EXPECT_EQ(Rows2[1].DwCompileUnitIndex, 30u); + EXPECT_EQ(Rows2[2].RowIndex, 60u); + + EXPECT_EQ(Rows3[0].DwCompileUnitIndex, 999u); + EXPECT_EQ(Rows3[0].RowIndex, 888u); +} + +} // namespace From 2fca446779333f540b1a582f4a8cbc14744e8e18 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 11 Sep 2025 17:41:40 +0000 Subject: [PATCH 005/734] [msan] Handle AVX512 pack with saturation intrinsics (#157984) Approximately handle avx512_{packssdw/packsswb/packusdw/packuswb} with the existing handleVectorPackIntrinsic(), instead of relying on the default (strict) handler. --- .../Instrumentation/MemorySanitizer.cpp | 21 + .../X86/avx512bw-intrinsics-upgrade.ll | 584 +++++++----------- .../X86/avx512bw-intrinsics.ll | 584 +++++++----------- 3 files changed, 453 insertions(+), 736 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 9899a2aae2b15..3ea790ad1839a 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3684,6 +3684,15 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::x86_mmx_packssdw: return Intrinsic::x86_mmx_packssdw; + + case Intrinsic::x86_avx512_packssdw_512: + case Intrinsic::x86_avx512_packusdw_512: + return Intrinsic::x86_avx512_packssdw_512; + + case Intrinsic::x86_avx512_packsswb_512: + case Intrinsic::x86_avx512_packuswb_512: + return Intrinsic::x86_avx512_packsswb_512; + default: llvm_unreachable("unexpected intrinsic id"); } @@ -3696,6 +3705,8 @@ struct MemorySanitizerVisitor : public InstVisitor { // Shadow is propagated with the signed variant of the same intrinsic applied // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer). // MMXEltSizeInBits is used only for x86mmx arguments. + // + // TODO: consider using GetMinMaxUnsigned() to handle saturation precisely void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned MMXEltSizeInBits = 0) { assert(I.arg_size() == 2); @@ -5554,6 +5565,7 @@ struct MemorySanitizerVisitor : public InstVisitor { handleVectorShiftIntrinsic(I, /* Variable */ true); break; + // Pack with Signed/Unsigned Saturation case Intrinsic::x86_sse2_packsswb_128: case Intrinsic::x86_sse2_packssdw_128: case Intrinsic::x86_sse2_packuswb_128: @@ -5562,6 +5574,15 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::x86_avx2_packssdw: case Intrinsic::x86_avx2_packuswb: case Intrinsic::x86_avx2_packusdw: + // e.g., <64 x i8> @llvm.x86.avx512.packsswb.512 + // (<32 x i16> %a, <32 x i16> %b) + // <32 x i16> @llvm.x86.avx512.packssdw.512 + // (<16 x i32> %a, <16 x i32> %b) + // Note: AVX512 masked variants are auto-upgraded by LLVM. + case Intrinsic::x86_avx512_packsswb_512: + case Intrinsic::x86_avx512_packssdw_512: + case Intrinsic::x86_avx512_packuswb_512: + case Intrinsic::x86_avx512_packusdw_512: handleVectorPackIntrinsic(I); break; diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll index 51dad35a1edbc..7bd35182d5c90 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll @@ -5,8 +5,6 @@ ; ; Strictly handled: ; - llvm.x86.avx512.dbpsadbw.512 -; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512 -; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512 ; ; Heuristically handled: ; - llvm.sadd.sat.v32i16, llvm.sadd.sat.v64i8 @@ -2039,19 +2037,14 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) no ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] ; %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -2064,25 +2057,20 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, < ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP17]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP17]], <32 x i16> [[PASSTHRU]] ; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP16]] ; @@ -2096,25 +2084,20 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP16]], <32 x i16> zeroinitializer ; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP15]] ; @@ -2138,18 +2121,13 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) nounw ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %b = load <16 x i32>, ptr %ptr_b @@ -2175,22 +2153,17 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -2219,22 +2192,17 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -2266,18 +2234,13 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) noun ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b @@ -2309,22 +2272,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -2359,22 +2317,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -2395,19 +2348,14 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nou ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP8]] ; %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) ret <64 x i8> %res @@ -2420,25 +2368,20 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP17:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP17]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP17]], <64 x i8> [[PASSTHRU]] ; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP16]] ; @@ -2452,25 +2395,20 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP16:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP16]], <64 x i8> zeroinitializer ; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP15]] ; @@ -2494,18 +2432,13 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwi ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP12]] ; %b = load <32 x i16>, ptr %ptr_b @@ -2531,22 +2464,17 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] @@ -2575,22 +2503,17 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer @@ -2610,18 +2533,13 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) n ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP8]], <16 x i32> [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP7]] ; %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) @@ -2635,22 +2553,17 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP17]], <16 x i32> [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP6]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] @@ -2667,22 +2580,17 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP16]], <16 x i32> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP5]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer @@ -2709,18 +2617,13 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) noun ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %b = load <16 x i32>, ptr %ptr_b @@ -2746,22 +2649,17 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -2790,22 +2688,17 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -2837,18 +2730,13 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) nou ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b @@ -2880,22 +2768,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -2930,22 +2813,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -2966,18 +2844,13 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) no ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP8]], <32 x i16> [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP7]] ; %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) @@ -2991,22 +2864,17 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, < ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP17]], <32 x i16> [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP6]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] @@ -3023,22 +2891,17 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP16]], <32 x i16> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP5]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer @@ -3065,18 +2928,13 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounw ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP12]] ; %b = load <32 x i16>, ptr %ptr_b @@ -3102,22 +2960,17 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] @@ -3146,22 +2999,17 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll index c6c7e002213bd..8bf6d5acc21ba 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll @@ -7,8 +7,6 @@ ; - llvm.x86.avx512.dbpsadbw.512 ; - llvm.x86.avx512.ktestc.d, llvm.x86.avx512.ktestc.q, llvm.x86.avx512.ktestz.d, llvm.x86.avx512.ktestz.q ; - llvm.x86.avx512.mask.pmov.wb.mem.512 -; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512 -; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512 ; - llvm.x86.avx512.psad.bw.512 ; ; Heuristically handled: @@ -295,19 +293,14 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) #0 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] ; %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) ret <32 x i16> %1 @@ -320,25 +313,20 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, < ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP17]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP17]], <32 x i16> [[PASSTHRU]] ; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP16]] ; @@ -354,25 +342,20 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP16]], <32 x i16> zeroinitializer ; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP15]] ; @@ -398,18 +381,13 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %b = load <16 x i32>, ptr %ptr_b @@ -435,22 +413,17 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -481,22 +454,17 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -530,18 +498,13 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b @@ -573,22 +536,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -625,22 +583,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -663,19 +616,14 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) #0 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP8]] ; %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) ret <64 x i8> %1 @@ -688,25 +636,20 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP17:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP17]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP17]], <64 x i8> [[PASSTHRU]] ; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP16]] ; @@ -722,25 +665,20 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP16:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP16]], <64 x i8> zeroinitializer ; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP15]] ; @@ -766,18 +704,13 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP12]] ; %b = load <32 x i16>, ptr %ptr_b @@ -803,22 +736,17 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] @@ -849,22 +777,17 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer @@ -886,18 +809,13 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) # ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP8]], <16 x i32> [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP7]] ; %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) @@ -911,22 +829,17 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP17]], <16 x i32> [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP6]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] @@ -945,22 +858,17 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP16]], <16 x i32> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP5]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer @@ -989,18 +897,13 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %b = load <16 x i32>, ptr %ptr_b @@ -1026,22 +929,17 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -1072,22 +970,17 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -1121,18 +1014,13 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b @@ -1164,22 +1052,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -1216,22 +1099,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -1254,18 +1132,13 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) #0 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP8]], <32 x i16> [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP7]] ; %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) @@ -1279,22 +1152,17 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, < ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP17]], <32 x i16> [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP6]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] @@ -1313,22 +1181,17 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP16]], <32 x i16> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP5]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer @@ -1357,18 +1220,13 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP12]] ; %b = load <32 x i16>, ptr %ptr_b @@ -1394,22 +1252,17 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] @@ -1440,22 +1293,17 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer From 5582f0ca1df063e55bd987dfc57392d1d251f4e9 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 11 Sep 2025 17:39:02 +0000 Subject: [PATCH 006/734] [gn build] Manually port a3a25996 --- llvm/utils/gn/secondary/llvm/lib/Support/LSP/BUILD.gn | 9 +++++++++ llvm/utils/gn/secondary/llvm/unittests/BUILD.gn | 1 + .../gn/secondary/llvm/unittests/Support/LSP/BUILD.gn | 9 +++++++++ 3 files changed, 19 insertions(+) create mode 100644 llvm/utils/gn/secondary/llvm/lib/Support/LSP/BUILD.gn create mode 100644 llvm/utils/gn/secondary/llvm/unittests/Support/LSP/BUILD.gn diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/LSP/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/LSP/BUILD.gn new file mode 100644 index 0000000000000..c510891dca092 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Support/LSP/BUILD.gn @@ -0,0 +1,9 @@ +static_library("LSP") { + output_name = "LLVMSupportLSP" + deps = [ "//llvm/lib/Support" ] + sources = [ + "Logging.cpp", + "Protocol.cpp", + "Transport.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn index 22026664ae596..9ca6715f155c3 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn @@ -51,6 +51,7 @@ group("unittests") { "SandboxIR:SandboxIRTests", "Support:SupportTests", "Support/DynamicLibrary:DynamicLibraryTests", + "Support/LSP:LSPTests", "TableGen:TableGenTests", "Target:TargetMachineCTests", "TargetParser:TargetParserTests", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/LSP/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/LSP/BUILD.gn new file mode 100644 index 0000000000000..3b59ef1288652 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/LSP/BUILD.gn @@ -0,0 +1,9 @@ +import("//third-party/unittest/unittest.gni") + +unittest("LSPTests") { + deps = [ "//llvm/lib/Support/LSP" ] + sources = [ + "Protocol.cpp", + "Transport.cpp", + ] +} From 71da9288f62affb0b55d2951185a661567015d81 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 11 Sep 2025 17:42:33 +0000 Subject: [PATCH 007/734] [gn build] Add missing deps --- llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn index 83fd0aa8de422..6d85c7fb67477 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn @@ -108,6 +108,7 @@ static_library("LLVMAArch64CodeGen") { "//llvm/lib/CodeGen/SelectionDAG", "//llvm/lib/IR", "//llvm/lib/MC", + "//llvm/lib/Passes", "//llvm/lib/Support", "//llvm/lib/Target", "//llvm/lib/TargetParser", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index 306e4d3f9f6b8..a1f5b475e2096 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -109,6 +109,7 @@ static_library("LLVMRISCVCodeGen") { "//llvm/lib/CodeGen/SelectionDAG", "//llvm/lib/IR", "//llvm/lib/MC", + "//llvm/lib/Passes", "//llvm/lib/Support", "//llvm/lib/Target", "//llvm/lib/TargetParser", From ddb2e34334ece7c2d90d3affea9111aebeed41bc Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 11 Sep 2025 13:51:00 -0400 Subject: [PATCH 008/734] [lld/mac] Fix comment typos to cycle bots --- lld/MachO/ICF.cpp | 2 +- lld/MachO/ObjC.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index ae0bee8e942cb..7b31378c3781e 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -449,7 +449,7 @@ void ICF::run() { ConcatInputSection *beginIsec = icfInputs[begin]; for (size_t i = begin + 1; i < end; ++i) { - // Skip keepUnique inputs when using safe_thunks (already handeled above) + // Skip keepUnique inputs when using safe_thunks (already handled above) if (useSafeThunks && icfInputs[i]->keepUnique) { // Assert keepUnique sections are either small or replaced with thunks. assert(!icfInputs[i]->live || diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index 35954b25f7149..ab7f73c3a1df6 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -632,7 +632,7 @@ bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory( tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset); if (!catNameSym) { - // This is an unhandeled case where the category name is not a symbol but + // This is an unhandled case where the category name is not a symbol but // instead points to an CStringInputSection (that doesn't have any symbol) // TODO: Find a small repro and either fix or add a test case for this // scenario From 9d19250610fdaa80600d32fc7f6e06dcefd6bbff Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Thu, 11 Sep 2025 13:56:57 -0400 Subject: [PATCH 009/734] [mlir][vector] Add vector.to_elements unrolling (#157142) This PR adds support for unrolling `vector.to_element`'s source operand. It transforms ```mlir %0:8 = vector.to_elements %v : vector<2x2x2xf32> ``` to ```mlir %v0 = vector.extract %v[0] : vector<2x2xf32> from vector<2x2x2xf32> %v1 = vector.extract %v[1] : vector<2x2xf32> from vector<2x2x2xf32> %0:4 = vector.to_elements %v0 : vector<2x2xf32> %1:4 = vector.to_elements %v1 : vector<2x2xf32> // %0:8 = %0:4 - %1:4 ``` This pattern will be applied until there are only 1-D vectors left. --------- Signed-off-by: hanhanW Co-authored-by: hanhanW Co-authored-by: Jakub Kuderski --- .../Vector/TransformOps/VectorTransformOps.td | 11 ++++ .../Vector/Transforms/LoweringPatterns.h | 6 +++ .../mlir/Dialect/Vector/Utils/VectorUtils.h | 6 +++ .../VectorToLLVM/ConvertVectorToLLVMPass.cpp | 1 + .../TransformOps/VectorTransformOps.cpp | 5 ++ .../Dialect/Vector/Transforms/CMakeLists.txt | 1 + .../Transforms/LowerVectorToElements.cpp | 53 +++++++++++++++++++ mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp | 35 ++++++++++++ .../VectorToLLVM/vector-to-llvm.mlir | 42 +++++++++++++++ mlir/test/Dialect/Vector/lit.local.cfg | 2 + .../Dialect/Vector/td/unroll-elements.mlir | 11 ++++ .../Vector/vector-to-elements-lowering.mlir | 26 +++++++++ .../Dialect/Vector/TestVectorTransforms.cpp | 24 +++++++++ .../python/dialects/transform_vector_ext.py | 2 + 14 files changed, 225 insertions(+) create mode 100644 mlir/lib/Dialect/Vector/Transforms/LowerVectorToElements.cpp create mode 100644 mlir/test/Dialect/Vector/lit.local.cfg create mode 100644 mlir/test/Dialect/Vector/td/unroll-elements.mlir create mode 100644 mlir/test/Dialect/Vector/vector-to-elements-lowering.mlir diff --git a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td index 07a4117a37b2c..72a69a056c46e 100644 --- a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td +++ b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td @@ -265,6 +265,17 @@ def ApplyUnrollFromElementsPatternsOp : Op]> { + let description = [{ + Indicates that vector to_elements operations should be unrolled + along the outermost dimension. + }]; + + let assemblyFormat = "attr-dict"; +} + def ApplyLowerScanPatternsOp : Op]> { diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h b/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h index 47f96112a9433..f56124cb4fb95 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h @@ -311,6 +311,12 @@ void populateVectorToFromElementsToShuffleTreePatterns( void populateVectorFromElementsLoweringPatterns(RewritePatternSet &patterns, PatternBenefit benefit = 1); +/// Populate the pattern set with the following patterns: +/// +/// [UnrollToElements] +void populateVectorToElementsLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + /// Populate the pattern set with the following patterns: /// /// [ContractionOpToMatmulOpLowering] diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h index ace26990601c8..97163c4532378 100644 --- a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h +++ b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h @@ -255,6 +255,12 @@ using UnrollVectorOpFn = LogicalResult unrollVectorOp(Operation *op, PatternRewriter &rewriter, UnrollVectorOpFn unrollFn); +/// Generic utility for unrolling values of type vector +/// to N values of type vector using vector.extract. If the input +/// is rank-1 or has leading scalable dimension, failure is returned. +FailureOr> unrollVectorValue(TypedValue, + RewriterBase &); + } // namespace vector /// Constructs a permutation map of invariant memref indices to vector diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp index 9852df6970fdc..0b44ca7ceee42 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp @@ -95,6 +95,7 @@ void ConvertVectorToLLVMPass::runOnOperation() { populateVectorRankReducingFMAPattern(patterns); populateVectorGatherLoweringPatterns(patterns); populateVectorFromElementsLoweringPatterns(patterns); + populateVectorToElementsLoweringPatterns(patterns); if (armI8MM) { if (armNeon) arm_neon::populateLowerContractionToNeonI8MMPatterns(patterns); diff --git a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp index fe066dc04ad55..6bb390aa09d3e 100644 --- a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp +++ b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp @@ -144,6 +144,11 @@ void transform::ApplyUnrollFromElementsPatternsOp::populatePatterns( vector::populateVectorFromElementsLoweringPatterns(patterns); } +void transform::ApplyUnrollToElementsPatternsOp::populatePatterns( + RewritePatternSet &patterns) { + vector::populateVectorToElementsLoweringPatterns(patterns); +} + void transform::ApplyLowerScanPatternsOp::populatePatterns( RewritePatternSet &patterns) { vector::populateVectorScanLoweringPatterns(patterns); diff --git a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt index acbf2b746037b..d74007f13a95b 100644 --- a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_dialect_library(MLIRVectorTransforms LowerVectorScan.cpp LowerVectorShapeCast.cpp LowerVectorStep.cpp + LowerVectorToElements.cpp LowerVectorToFromElementsToShuffleTree.cpp LowerVectorTransfer.cpp LowerVectorTranspose.cpp diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorToElements.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorToElements.cpp new file mode 100644 index 0000000000000..a53a183ec31bc --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorToElements.cpp @@ -0,0 +1,53 @@ +//===- LowerVectorToElements.cpp - Lower 'vector.to_elements' op ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.to_elements' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" + +#define DEBUG_TYPE "lower-vector-to-elements" + +using namespace mlir; + +namespace { + +struct UnrollToElements final : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ToElementsOp op, + PatternRewriter &rewriter) const override { + + TypedValue source = op.getSource(); + FailureOr> result = + vector::unrollVectorValue(source, rewriter); + if (failed(result)) { + return failure(); + } + SmallVector vectors = *result; + + SmallVector results; + for (const Value &vector : vectors) { + auto subElements = + vector::ToElementsOp::create(rewriter, op.getLoc(), vector); + llvm::append_range(results, subElements.getResults()); + } + rewriter.replaceOp(op, results); + return success(); + } +}; + +} // namespace + +void mlir::vector::populateVectorToElementsLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add(patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp index 841e1384e03b3..39dc7a4f284a6 100644 --- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp +++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp @@ -393,6 +393,41 @@ vector::isValidMaskedInputVector(ArrayRef shape, return success(); } +/// Takes a 2+ dimensional vector as an input +/// returns n vector values produced by n vector.extract operations. +/// I.e. calling unrollVectorValue([[%v]], rewriter) such that +/// +/// %v : vector +/// +/// will produce the following IR changes +/// +/// %v0 = vector.extract %v[0] : vector from vector +/// %v1 = vector.extract %v[1] : vector from vector +/// ... +/// %vnminusone = vector.extract %v[n-1] : vector from ... +/// +/// and returns SmallVector r = {[[%v0]], [[%v1]], ..., [[%vnminusone]]} +FailureOr> +vector::unrollVectorValue(TypedValue vector, + RewriterBase &rewriter) { + SmallVector subvectors; + VectorType ty = cast(vector.getType()); + Location loc = vector.getLoc(); + if (ty.getRank() < 2) + return rewriter.notifyMatchFailure(loc, "already 1-D"); + + // Unrolling doesn't take vscale into account. Pattern is disabled for + // vectors with leading scalable dim(s). + if (ty.getScalableDims().front()) + return rewriter.notifyMatchFailure(loc, "cannot unroll scalable dim"); + + for (int64_t i = 0, e = ty.getShape().front(); i < e; ++i) { + subvectors.push_back(vector::ExtractOp::create(rewriter, loc, vector, i)); + } + + return subvectors; +} + LogicalResult vector::unrollVectorOp(Operation *op, PatternRewriter &rewriter, vector::UnrollVectorOpFn unrollFn) { assert(op->getNumResults() == 1 && "expected single result"); diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 07d335117de01..2d33888854ea7 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -1774,3 +1774,45 @@ func.func @from_elements_3d(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32) -> v %0 = vector.from_elements %arg0, %arg1, %arg2, %arg3 : vector<2x1x2xf32> return %0 : vector<2x1x2xf32> } + +// ----- + +//===----------------------------------------------------------------------===// +// vector.to_elements +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: func @to_elements_1d( +// CHECK-SAME: %[[ARG0:.+]]: vector<2xf32> +// CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[V0:.+]] = llvm.extractelement %[[ARG0]][%[[C0]] : i64] : vector<2xf32> +// CHECK: %[[C1:.+]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[V1:.+]] = llvm.extractelement %[[ARG0]][%[[C1]] : i64] : vector<2xf32> +// CHECK: return %[[V0]], %[[V1]] +func.func @to_elements_1d(%arg0: vector<2xf32>) -> (f32, f32) { + %0:2 = vector.to_elements %arg0 : vector<2xf32> + return %0#0, %0#1 : f32, f32 +} + +// ----- + +// NOTE: We unroll multi-dimensional to_elements ops with pattern +// `UnrollToElements` and then convert the 1-D to_elements ops to llvm. + +// CHECK-LABEL: func @to_elements_2d( +// CHECK-SAME: %[[ARG0:.+]]: vector<2x2xf32> +// CHECK: %[[CAST:.+]] = builtin.unrealized_conversion_cast %[[ARG0]] : vector<2x2xf32> to !llvm.array<2 x vector<2xf32>> +// CHECK: %[[V0:.+]] = llvm.extractvalue %[[CAST]][0] : !llvm.array<2 x vector<2xf32>> +// CHECK: %[[V1:.+]] = llvm.extractvalue %[[CAST]][1] : !llvm.array<2 x vector<2xf32>> +// CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[R0:.+]] = llvm.extractelement %[[V0]][%[[C0]] : i64] : vector<2xf32> +// CHECK: %[[C1:.+]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[R1:.+]] = llvm.extractelement %[[V0]][%[[C1]] : i64] : vector<2xf32> +// CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[R2:.+]] = llvm.extractelement %[[V1]][%[[C0]] : i64] : vector<2xf32> +// CHECK: %[[C1:.+]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[R3:.+]] = llvm.extractelement %[[V1]][%[[C1]] : i64] : vector<2xf32> +// CHECK: return %[[R0]], %[[R1]], %[[R2]], %[[R3]] +func.func @to_elements_2d(%arg0: vector<2x2xf32>) -> (f32, f32, f32, f32) { + %0:4 = vector.to_elements %arg0 : vector<2x2xf32> + return %0#0, %0#1, %0#2, %0#3 : f32, f32, f32, f32 +} diff --git a/mlir/test/Dialect/Vector/lit.local.cfg b/mlir/test/Dialect/Vector/lit.local.cfg new file mode 100644 index 0000000000000..3e9e8f8497624 --- /dev/null +++ b/mlir/test/Dialect/Vector/lit.local.cfg @@ -0,0 +1,2 @@ +# Skip the directory with input TD sequences. +config.excludes = ["td"] diff --git a/mlir/test/Dialect/Vector/td/unroll-elements.mlir b/mlir/test/Dialect/Vector/td/unroll-elements.mlir new file mode 100644 index 0000000000000..40a90a33b0ac4 --- /dev/null +++ b/mlir/test/Dialect/Vector/td/unroll-elements.mlir @@ -0,0 +1,11 @@ +module attributes {transform.with_named_sequence} { + transform.named_sequence @unroll_to_elements(%module_op: !transform.any_op {transform.readonly}) { + %f = transform.structured.match ops{["func.func"]} in %module_op + : (!transform.any_op) -> !transform.any_op + transform.apply_patterns to %f { + transform.apply_patterns.vector.transfer_permutation_patterns + transform.apply_patterns.vector.unroll_to_elements + } : !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Dialect/Vector/vector-to-elements-lowering.mlir b/mlir/test/Dialect/Vector/vector-to-elements-lowering.mlir new file mode 100644 index 0000000000000..9ec0d76599c41 --- /dev/null +++ b/mlir/test/Dialect/Vector/vector-to-elements-lowering.mlir @@ -0,0 +1,26 @@ +// RUN: mlir-opt %s -test-unroll-vector-to-elements -split-input-file | FileCheck %s +// RUN: mlir-opt %s -transform-preload-library='transform-library-paths=%p/td/unroll-elements.mlir' \ +// RUN: -transform-interpreter=entry-point=unroll_to_elements | FileCheck %s + +// CHECK-LABEL: func.func @to_elements_1d( +// CHECK-SAME: %[[ARG0:.+]]: vector<2xf32> +// CHECK: %[[RES:.+]]:2 = vector.to_elements %[[ARG0]] : vector<2xf32> +// CHECK: return %[[RES]]#0, %[[RES]]#1 +func.func @to_elements_1d(%arg0: vector<2xf32>) -> (f32, f32) { + %0:2 = vector.to_elements %arg0 : vector<2xf32> + return %0#0, %0#1 : f32, f32 +} + +// ----- + +// CHECK-LABEL: func.func @to_elements_2d( +// CHECK-SAME: %[[ARG0:.+]]: vector<2x2xf32> +// CHECK: %[[VEC0:.+]] = vector.extract %[[ARG0]][0] : vector<2xf32> from vector<2x2xf32> +// CHECK: %[[VEC1:.+]] = vector.extract %[[ARG0]][1] : vector<2xf32> from vector<2x2xf32> +// CHECK: %[[RES0:.+]]:2 = vector.to_elements %[[VEC0]] : vector<2xf32> +// CHECK: %[[RES1:.+]]:2 = vector.to_elements %[[VEC1]] : vector<2xf32> +// CHECK: return %[[RES0]]#0, %[[RES0]]#1, %[[RES1]]#0, %[[RES1]]#1 +func.func @to_elements_2d(%arg0: vector<2x2xf32>) -> (f32, f32, f32, f32) { + %0:4 = vector.to_elements %arg0 : vector<2x2xf32> + return %0#0, %0#1, %0#2, %0#3 : f32, f32, f32, f32 +} diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index bb1598ee3efe5..d6596cd341df7 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -808,6 +808,28 @@ struct TestUnrollVectorFromElements } }; +struct TestUnrollVectorToElements + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestUnrollVectorToElements) + + StringRef getArgument() const final { + return "test-unroll-vector-to-elements"; + } + StringRef getDescription() const final { + return "Test unrolling patterns for to_elements ops"; + } + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + RewritePatternSet patterns(&getContext()); + populateVectorToElementsLoweringPatterns(patterns); + (void)applyPatternsGreedily(getOperation(), std::move(patterns)); + } +}; + struct TestFoldArithExtensionIntoVectorContractPatterns : public PassWrapper> { @@ -1083,6 +1105,8 @@ void registerTestVectorLowerings() { PassRegistration(); + PassRegistration(); + PassRegistration(); PassRegistration(); diff --git a/mlir/test/python/dialects/transform_vector_ext.py b/mlir/test/python/dialects/transform_vector_ext.py index 5a648fe073315..28902b012f7cb 100644 --- a/mlir/test/python/dialects/transform_vector_ext.py +++ b/mlir/test/python/dialects/transform_vector_ext.py @@ -48,6 +48,8 @@ def non_configurable_patterns(): vector.ApplyLowerGatherPatternsOp() # CHECK: transform.apply_patterns.vector.unroll_from_elements vector.ApplyUnrollFromElementsPatternsOp() + # CHECK: transform.apply_patterns.vector.unroll_to_elements + vector.ApplyUnrollToElementsPatternsOp() # CHECK: transform.apply_patterns.vector.lower_scan vector.ApplyLowerScanPatternsOp() # CHECK: transform.apply_patterns.vector.lower_shape_cast From b812e3d61a9230424cec92e05f073f080f62eed5 Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Thu, 11 Sep 2025 13:58:42 -0400 Subject: [PATCH 010/734] [mlir][vector] Add LinearizeVectorToElements (#157740) Co-authored-by: James Newling --- .../Vector/Transforms/VectorLinearize.cpp | 49 ++++++++++++++++++- mlir/test/Dialect/Vector/linearize.mlir | 23 +++++++++ 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp index 7dde6311fa809..12acf4b3f07f5 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp @@ -798,6 +798,51 @@ struct LinearizeVectorFromElements final } }; +/// This pattern linearizes the operand in `vector.to_elements` operations +/// by converting the source type to a 1-D vector while preserving all element +/// values. The transformation creates a linearized `vector.shape_cast` +/// followed by a `vector.to_elements`. +/// +/// Example: +/// +/// %0:4 = vector.to_elements %v : vector<2x2xf32> +/// +/// is converted to: +/// +/// %vector_cast = vector.shape_cast %v : vector<2x2xf32> to vector<4xf32> +/// %0:4 = vector.to_elements %vector_cast : vector<4xf32> +/// +struct LinearizeVectorToElements final + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LinearizeVectorToElements(const TypeConverter &typeConverter, + MLIRContext *context, PatternBenefit benefit = 1) + : OpConversionPattern(typeConverter, context, benefit) {} + + LogicalResult + matchAndRewrite(vector::ToElementsOp toElementsOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + + VectorType vecType = toElementsOp.getSource().getType(); + if (vecType.getRank() <= 1) + return rewriter.notifyMatchFailure( + toElementsOp, "the rank is already less than or equal to 1"); + + assert(vecType.getNumScalableDims() == 0 && + "to_elements does not support scalable vectors"); + auto vec1DType = + VectorType::get({vecType.getNumElements()}, vecType.getElementType()); + Value shapeCast = vector::ShapeCastOp::create( + rewriter, toElementsOp.getLoc(), vec1DType, toElementsOp.getSource()); + auto newToElementsOp = + vector::ToElementsOp::create(rewriter, toElementsOp.getLoc(), + toElementsOp.getResultTypes(), shapeCast); + rewriter.replaceOp(toElementsOp, newToElementsOp); + return success(); + } +}; + } // namespace /// This method defines the set of operations that are linearizable, and hence @@ -890,8 +935,8 @@ void mlir::vector::populateVectorLinearizeBasePatterns( patterns .add( - typeConverter, patterns.getContext()); + LinearizeVectorStore, LinearizeVectorFromElements, + LinearizeVectorToElements>(typeConverter, patterns.getContext()); } void mlir::vector::populateVectorLinearizeShuffleLikeOpsPatterns( diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir index 5e8bfd0698b33..fe697c8b9c057 100644 --- a/mlir/test/Dialect/Vector/linearize.mlir +++ b/mlir/test/Dialect/Vector/linearize.mlir @@ -538,3 +538,26 @@ func.func @test_vector_from_elements(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: %1 = vector.from_elements %arg0, %arg1, %arg2, %arg3 : vector<2x2xf32> return %1 : vector<2x2xf32> } + +// ----- + +// CHECK-LABEL: func.func @to_elements_1d( +// CHECK-SAME: %[[ARG0:.+]]: vector<2xf32> +// CHECK: %[[RES:.+]]:2 = vector.to_elements %[[ARG0]] : vector<2xf32> +// CHECK: return %[[RES]]#0, %[[RES]]#1 +func.func @to_elements_1d(%arg0: vector<2xf32>) -> (f32, f32) { + %0:2 = vector.to_elements %arg0 : vector<2xf32> + return %0#0, %0#1 : f32, f32 +} + +// ----- + +// CHECK-LABEL: func.func @to_elements_2d( +// CHECK-SAME: %[[ARG0:.+]]: vector<2x2xf32> +// CHECK: %[[CAST:.+]] = vector.shape_cast %[[ARG0]] +// CHECK: %[[RES:.+]]:4 = vector.to_elements %[[CAST]] : vector<4xf32> +// CHECK: return %[[RES]]#0, %[[RES]]#1, %[[RES]]#2, %[[RES]]#3 +func.func @to_elements_2d(%arg0: vector<2x2xf32>) -> (f32, f32, f32, f32) { + %0:4 = vector.to_elements %arg0 : vector<2x2xf32> + return %0#0, %0#1, %0#2, %0#3 : f32, f32, f32, f32 +} From f3efbce4a73c595a038a6778a28c307ea987c2a7 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 11 Sep 2025 11:05:29 -0700 Subject: [PATCH 011/734] [llvm] Move data layout string computation to TargetParser (#157612) Clang and other frontends generally need the LLVM data layout string in order to generate LLVM IR modules for LLVM. MLIR clients often need it as well, since MLIR users often lower to LLVM IR. Before this change, the LLVM datalayout string was computed in the LLVM${TGT}CodeGen library in the relevant TargetMachine subclass. However, none of the logic for computing the data layout string requires any details of code generation. Clients who want to avoid duplicating this information were forced to link in LLVMCodeGen and all registered targets, leading to bloated binaries. This happened in PR #145899, which measurably increased binary size for some of our users. By moving this information to the TargetParser library, we can delete the duplicate datalayout strings in Clang, and retain the ability to generate IR for unregistered targets. This is intended to be a very mechanical LLVM-only change, but there is an immediately obvious follow-up to clang, which will be prepared separately. The vast majority of data layouts are computable with two inputs: the triple and the "ABI name". There is only one exception, NVPTX, which has a cl::opt to enable short device pointers. I invented a "shortptr" ABI name to pass this option through the target independent interface. Everything else fits. Mips is a bit awkward because it uses a special MipsABIInfo abstraction, which includes members with codegen-like concepts like ABI physical registers that can't live in TargetParser. I think the string logic of looking for "n32" "n64" etc is reasonable to duplicate. We have plenty of other minor duplication to preserve layering. --------- Co-authored-by: Matt Arsenault Co-authored-by: Sergei Barannikov --- llvm/benchmarks/RuntimeLibcalls.cpp | 5 +- llvm/include/llvm/IR/DataLayout.h | 2 - llvm/include/llvm/TargetParser/Triple.h | 4 + llvm/lib/IR/DataLayout.cpp | 12 - .../Target/AArch64/AArch64TargetMachine.cpp | 30 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 21 +- llvm/lib/Target/ARC/ARCTargetMachine.cpp | 9 +- llvm/lib/Target/ARM/ARMTargetMachine.cpp | 71 +- llvm/lib/Target/ARM/ARMTargetMachine.h | 3 +- llvm/lib/Target/AVR/AVRTargetMachine.cpp | 7 +- llvm/lib/Target/BPF/BPFTargetMachine.cpp | 10 +- llvm/lib/Target/CSKY/CSKYTargetMachine.cpp | 17 +- .../Target/DirectX/DirectXTargetMachine.cpp | 7 +- .../Target/Hexagon/HexagonTargetMachine.cpp | 12 +- llvm/lib/Target/Lanai/LanaiTargetMachine.cpp | 13 +- .../LoongArch/LoongArchTargetMachine.cpp | 9 +- llvm/lib/Target/M68k/M68kTargetMachine.cpp | 33 +- .../lib/Target/MSP430/MSP430TargetMachine.cpp | 9 +- .../Target/Mips/AsmParser/MipsAsmParser.cpp | 4 +- .../Target/Mips/MCTargetDesc/MipsABIInfo.cpp | 11 +- .../Target/Mips/MCTargetDesc/MipsABIInfo.h | 3 +- .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 2 +- .../Mips/MCTargetDesc/MipsMCAsmInfo.cpp | 3 +- llvm/lib/Target/Mips/MipsTargetMachine.cpp | 46 +- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 26 +- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 56 +- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 40 +- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 26 +- llvm/lib/Target/Sparc/SparcTargetMachine.cpp | 35 +- .../Target/SystemZ/SystemZTargetMachine.cpp | 43 +- llvm/lib/Target/VE/VETargetMachine.cpp | 34 +- .../WebAssembly/WebAssemblyTargetMachine.cpp | 16 +- llvm/lib/Target/X86/X86TargetMachine.cpp | 50 +- .../lib/Target/Xtensa/XtensaTargetMachine.cpp | 10 +- llvm/lib/TargetParser/CMakeLists.txt | 1 + llvm/lib/TargetParser/TargetDataLayout.cpp | 629 ++++++++++++++++++ llvm/unittests/IR/DataLayoutTest.cpp | 7 - llvm/unittests/TargetParser/TripleTest.cpp | 9 + 38 files changed, 712 insertions(+), 613 deletions(-) create mode 100644 llvm/lib/TargetParser/TargetDataLayout.cpp diff --git a/llvm/benchmarks/RuntimeLibcalls.cpp b/llvm/benchmarks/RuntimeLibcalls.cpp index 9ac77bb74a3df..707bdca7ceab7 100644 --- a/llvm/benchmarks/RuntimeLibcalls.cpp +++ b/llvm/benchmarks/RuntimeLibcalls.cpp @@ -54,10 +54,7 @@ static std::vector readSymbolsFromFile(StringRef InputFile) { // Hackily figure out if there's a prefix on the symbol names - llvm-nm // appears to not have a flag to skip this. llvm::Triple HostTriple(LLVM_HOST_TRIPLE); - std::string DummyDatalayout = "e"; - DummyDatalayout += DataLayout::getManglingComponent(HostTriple); - - DataLayout DL(DummyDatalayout); + DataLayout DL(HostTriple.computeDataLayout()); char GlobalPrefix = DL.getGlobalPrefix(); std::vector Lines; diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 2acae246c0b1e..5653ee7b6837d 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -303,8 +303,6 @@ class DataLayout { llvm_unreachable("invalid mangling mode"); } - LLVM_ABI static const char *getManglingComponent(const Triple &T); - /// Returns true if the specified type fits in a native integer type /// supported by the CPU. /// diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index c8fa482a9a4f4..f9b4fc3aa2010 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -1328,6 +1328,10 @@ class Triple { const VersionTuple &Version); LLVM_ABI ExceptionHandling getDefaultExceptionHandling() const; + + /// Compute the LLVM IR data layout string based on the triple. Some targets + /// customize the layout based on the ABIName string. + LLVM_ABI std::string computeDataLayout(StringRef ABIName = "") const; }; } // End llvm namespace diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index ed629d4e5ea22..77f9b997a2ebf 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -172,18 +172,6 @@ struct LessPointerAddrSpace { }; } // namespace -const char *DataLayout::getManglingComponent(const Triple &T) { - if (T.isOSBinFormatGOFF()) - return "-m:l"; - if (T.isOSBinFormatMachO()) - return "-m:o"; - if ((T.isOSWindows() || T.isUEFI()) && T.isOSBinFormatCOFF()) - return T.getArch() == Triple::x86 ? "-m:x" : "-m:w"; - if (T.isOSBinFormatXCOFF()) - return "-m:a"; - return "-m:e"; -} - // Default primitive type specifications. // NOTE: These arrays must be sorted by type bit width. constexpr DataLayout::PrimitiveSpec DefaultIntSpecs[] = { diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 4650b2d0c8151..dde1d88403bfe 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -295,27 +295,6 @@ static std::unique_ptr createTLOF(const Triple &TT) { return std::make_unique(); } -// Helper function to build a DataLayout string -static std::string computeDataLayout(const Triple &TT, - const MCTargetOptions &Options, - bool LittleEndian) { - if (TT.isOSBinFormatMachO()) { - if (TT.getArch() == Triple::aarch64_32) - return "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-" - "n32:64-S128-Fn32"; - return "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-" - "Fn32"; - } - if (TT.isOSBinFormatCOFF()) - return "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:" - "128-n32:64-S128-Fn32"; - std::string Endian = LittleEndian ? "e" : "E"; - std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : ""; - return Endian + "-m:e" + Ptr32 + - "-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-" - "n32:64-S128-Fn32"; -} - static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) { if (CPU.empty() && TT.isArm64e()) return "apple-a12"; @@ -368,11 +347,10 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, std::optional CM, CodeGenOptLevel OL, bool JIT, bool LittleEndian) - : CodeGenTargetMachineImpl( - T, computeDataLayout(TT, Options.MCOptions, LittleEndian), TT, - computeDefaultCPU(TT, CPU), FS, Options, - getEffectiveRelocModel(TT, RM), - getEffectiveAArch64CodeModel(TT, CM, JIT), OL), + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, + computeDefaultCPU(TT, CPU), FS, Options, + getEffectiveRelocModel(TT, RM), + getEffectiveAArch64CodeModel(TT, CM, JIT), OL), TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian), UseNewSMEABILowering(EnableNewSMEABILowering) { initAsmInfo(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 9afe7590fe4ef..92a587b5771b6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -720,25 +720,6 @@ static MachineSchedRegistry GCNILPSchedRegistry( "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler); -static StringRef computeDataLayout(const Triple &TT) { - if (TT.getArch() == Triple::r600) { - // 32-bit pointers. - return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" - "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; - } - - // 32-bit private, local, and region pointers. 64-bit global, constant and - // flat. 160-bit non-integral fat buffer pointers that include a 128-bit - // buffer descriptor and a 32-bit offset, which are indexed by 32-bit values - // (address space 7), and 128-bit non-integral buffer resourcees (address - // space 8) which cannot be non-trivilally accessed by LLVM memory operations - // like getelementptr. - return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" - "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-" - "v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; -} - LLVM_READNONE static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) { if (!GPU.empty()) @@ -764,7 +745,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, std::optional CM, CodeGenOptLevel OptLevel) : CodeGenTargetMachineImpl( - T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), FS, Options, + T, TT.computeDataLayout(), TT, getGPUOrDefault(TT, CPU), FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OptLevel), TLOF(createTLOF(getTargetTriple())) { diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/llvm/lib/Target/ARC/ARCTargetMachine.cpp index 370336394ba7f..8e1944062a2c3 100644 --- a/llvm/lib/Target/ARC/ARCTargetMachine.cpp +++ b/llvm/lib/Target/ARC/ARCTargetMachine.cpp @@ -33,12 +33,9 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl( - T, - "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-" - "f32:32:32-i64:32-f64:32-a:0:32-n32", - TT, CPU, FS, Options, getRelocModel(RM), - getEffectiveCodeModel(CM, CodeModel::Small), OL), + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, + getRelocModel(RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()), Subtarget(TT, std::string(CPU), std::string(FS), *this) { initAsmInfo(); diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index fedf9e2cf34b1..346776e0c4b25 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -121,62 +121,6 @@ static std::unique_ptr createTLOF(const Triple &TT) { return std::make_unique(); } -static std::string computeDataLayout(const Triple &TT, - const TargetOptions &Options, - bool isLittle) { - auto ABI = ARM::computeTargetABI(TT, Options.MCOptions.ABIName); - std::string Ret; - - if (isLittle) - // Little endian. - Ret += "e"; - else - // Big endian. - Ret += "E"; - - Ret += DataLayout::getManglingComponent(TT); - - // Pointers are 32 bits and aligned to 32 bits. - Ret += "-p:32:32"; - - // Function pointers are aligned to 8 bits (because the LSB stores the - // ARM/Thumb state). - Ret += "-Fi8"; - - // ABIs other than APCS have 64 bit integers with natural alignment. - if (ABI != ARM::ARM_ABI_APCS) - Ret += "-i64:64"; - - // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 - // bits, others to 64 bits. We always try to align to 64 bits. - if (ABI == ARM::ARM_ABI_APCS) - Ret += "-f64:32:64"; - - // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others - // to 64. We always ty to give them natural alignment. - if (ABI == ARM::ARM_ABI_APCS) - Ret += "-v64:32:64-v128:32:128"; - else if (ABI != ARM::ARM_ABI_AAPCS16) - Ret += "-v128:64:128"; - - // Try to align aggregates to 32 bits (the default is 64 bits, which has no - // particular hardware support on 32-bit ARM). - Ret += "-a:0:32"; - - // Integer registers are 32 bits. - Ret += "-n32"; - - // The stack is 64 bit aligned on AAPCS and 32 bit aligned everywhere else. - if (ABI == ARM::ARM_ABI_AAPCS16) - Ret += "-S128"; - else if (ABI == ARM::ARM_ABI_AAPCS) - Ret += "-S64"; - else - Ret += "-S32"; - - return Ret; -} - static Reloc::Model getEffectiveRelocModel(const Triple &TT, std::optional RM) { if (!RM) @@ -201,12 +145,13 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, const TargetOptions &Options, std::optional RM, std::optional CM, - CodeGenOptLevel OL, bool isLittle) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT, Options, isLittle), TT, - CPU, FS, Options, getEffectiveRelocModel(TT, RM), - getEffectiveCodeModel(CM, CodeModel::Small), OL), + CodeGenOptLevel OL) + : CodeGenTargetMachineImpl( + T, TT.computeDataLayout(Options.MCOptions.ABIName), TT, CPU, FS, + Options, getEffectiveRelocModel(TT, RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), TargetABI(ARM::computeTargetABI(TT, Options.MCOptions.ABIName)), - TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) { + TLOF(createTLOF(getTargetTriple())), isLittle(TT.isLittleEndian()) { // Default to triple-appropriate float ABI if (Options.FloatABIType == FloatABI::Default) { @@ -334,7 +279,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -342,7 +287,7 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} namespace { diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h index 1d73af1da6d02..c417c4c8bae65 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -42,8 +42,7 @@ class ARMBaseTargetMachine : public CodeGenTargetMachineImpl { ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, - std::optional CM, CodeGenOptLevel OL, - bool isLittle); + std::optional CM, CodeGenOptLevel OL); ~ARMBaseTargetMachine() override; const ARMSubtarget *getSubtargetImpl(const Function &F) const override; diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp index fbd148478c894..f001d7974669a 100644 --- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp +++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp @@ -28,9 +28,6 @@ namespace llvm { -static const char *AVRDataLayout = - "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8:16-a:8"; - /// Processes a CPU name. static StringRef getCPU(StringRef CPU) { if (CPU.empty() || CPU == "generic") { @@ -50,8 +47,8 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, AVRDataLayout, TT, getCPU(CPU), FS, Options, - getEffectiveRelocModel(RM), + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, getCPU(CPU), FS, + Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), SubTarget(TT, std::string(getCPU(CPU)), std::string(FS), *this) { this->TLOF = std::make_unique(); diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index 527a480354571..10b758647c735 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -59,14 +59,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() { initializeBPFMIPreEmitCheckingPass(PR); } -// DataLayout: little or big endian -static std::string computeDataLayout(const Triple &TT) { - if (TT.getArch() == Triple::bpfeb) - return "E-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; - else - return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; -} - static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::PIC_); } @@ -77,7 +69,7 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT), TT, CPU, FS, Options, + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()), diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp index ae6ef89fdcd07..d0058b9af14be 100644 --- a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp +++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp @@ -33,28 +33,13 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTarget() { initializeCSKYDAGToDAGISelLegacyPass(*Registry); } -static std::string computeDataLayout(const Triple &TT) { - std::string Ret; - - // Only support little endian for now. - // TODO: Add support for big endian. - Ret += "e"; - - // CSKY is always 32-bit target with the CSKYv2 ABI as prefer now. - // It's a 4-byte aligned stack with ELF mangling only. - Ret += "-m:e-S32-p:32:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:32" - "-v128:32:32-a:0:32-Fi32-n32"; - - return Ret; -} - CSKYTargetMachine::CSKYTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT), TT, CPU, FS, Options, + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, RM.value_or(Reloc::Static), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()) { diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index f5d5a73c926e9..bcf84403b2c0d 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -134,11 +134,8 @@ DirectXTargetMachine::DirectXTargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl( - T, - "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-" - "f32:32-f64:64-n8:16:32:64", - TT, CPU, FS, Options, Reloc::Static, CodeModel::Small, OL), + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, + Reloc::Static, CodeModel::Small, OL), TLOF(std::make_unique()), Subtarget(std::make_unique(TT, CPU, FS, *this)) { initAsmInfo(); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 66508fd767793..0afa04ab57e81 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -231,14 +231,10 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, // Specify the vector alignment explicitly. For v512x1, the calculated // alignment would be 512*alignment(i1), which is 512 bytes, instead of // the required minimum of 64 bytes. - : CodeGenTargetMachineImpl( - T, - "e-m:e-p:32:32:32-a:0-n16:32-" - "i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-" - "v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048", - TT, CPU, FS, Options, getEffectiveRelocModel(RM), - getEffectiveCodeModel(CM, CodeModel::Small), - (HexagonNoOpt ? CodeGenOptLevel::None : OL)), + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), + getEffectiveCodeModel(CM, CodeModel::Small), + (HexagonNoOpt ? CodeGenOptLevel::None : OL)), TLOF(std::make_unique()), Subtarget(Triple(TT), CPU, FS, *this) { initAsmInfo(); diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp index 3d6ba9ecc55e2..df56f9ae39fe2 100644 --- a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp +++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp @@ -37,17 +37,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLanaiTarget() { initializeLanaiMemAluCombinerPass(PR); } -static std::string computeDataLayout() { - // Data layout (keep in sync with clang/lib/Basic/Targets.cpp) - return "E" // Big endian - "-m:e" // ELF name manging - "-p:32:32" // 32-bit pointers, 32 bit aligned - "-i64:64" // 64 bit integers, 64 bit aligned - "-a:0:32" // 32 bit alignment of objects of aggregate type - "-n32" // 32 bit native integer width - "-S64"; // 64 bit natural stack alignment -} - static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::PIC_); } @@ -58,7 +47,7 @@ LanaiTargetMachine::LanaiTargetMachine( std::optional CodeModel, CodeGenOptLevel OptLevel, bool JIT) : CodeGenTargetMachineImpl( - T, computeDataLayout(), TT, Cpu, FeatureString, Options, + T, TT.computeDataLayout(), TT, Cpu, FeatureString, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CodeModel, CodeModel::Medium), OptLevel), Subtarget(TT, Cpu, FeatureString, *this, Options, getCodeModel(), diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index c36db9c75dd3a..d0a8ababe8e58 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -57,13 +57,6 @@ static cl::opt cl::desc("Enable the loop data prefetch pass"), cl::init(false)); -static std::string computeDataLayout(const Triple &TT) { - if (TT.isArch64Bit()) - return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; - assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported"); - return "e-m:e-p:32:32-i64:64-n32-S128"; -} - static Reloc::Model getEffectiveRelocModel(const Triple &TT, std::optional RM) { return RM.value_or(Reloc::Static); @@ -93,7 +86,7 @@ LoongArchTargetMachine::LoongArchTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT), TT, CPU, FS, Options, + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveLoongArchCodeModel(TT, CM), OL), TLOF(std::make_unique()) { diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/llvm/lib/Target/M68k/M68kTargetMachine.cpp index ce15ee635e21b..847c27bac2cba 100644 --- a/llvm/lib/Target/M68k/M68kTargetMachine.cpp +++ b/llvm/lib/Target/M68k/M68kTargetMachine.cpp @@ -46,35 +46,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kTarget() { namespace { -std::string computeDataLayout(const Triple &TT, StringRef CPU, - const TargetOptions &Options) { - std::string Ret = ""; - // M68k is Big Endian - Ret += "E"; - - // FIXME how to wire it with the used object format? - Ret += "-m:e"; - - // M68k pointers are always 32 bit wide even for 16-bit CPUs. - // The ABI only specifies 16-bit alignment. - // On at least the 68020+ with a 32-bit bus, there is a performance benefit - // to having 32-bit alignment. - Ret += "-p:32:16:32"; - - // Bytes do not require special alignment, words are word aligned and - // long words are word aligned at minimum. - Ret += "-i8:8:8-i16:16:16-i32:16:32"; - - // FIXME no floats at the moment - - // The registers can hold 8, 16, 32 bits - Ret += "-n8:16:32"; - - Ret += "-a:0:16-S16"; - - return Ret; -} - Reloc::Model getEffectiveRelocModel(const Triple &TT, std::optional RM) { // If not defined we default to static @@ -101,8 +72,8 @@ M68kTargetMachine::M68kTargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT, CPU, Options), TT, CPU, - FS, Options, getEffectiveRelocModel(TT, RM), + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, + getEffectiveRelocModel(TT, RM), ::getEffectiveCodeModel(CM, JIT), OL), TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp index e6024f4a62185..988bcae120f9f 100644 --- a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -34,19 +34,14 @@ static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } -static std::string computeDataLayout(const Triple &TT, StringRef CPU, - const TargetOptions &Options) { - return "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16"; -} - MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT, CPU, Options), TT, CPU, - FS, Options, getEffectiveRelocModel(RM), + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()), Subtarget(TT, std::string(CPU), std::string(FS), *this) { diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 7b2ee832ae7db..8a5cb517c94c5 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -524,8 +524,8 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(Options, sti, MII), - ABI(MipsABIInfo::computeTargetABI(sti.getTargetTriple(), sti.getCPU(), - Options)) { + ABI(MipsABIInfo::computeTargetABI(sti.getTargetTriple(), + Options.getABIName())) { MCAsmParserExtension::Initialize(parser); parser.addAliasForDirective(".asciiz", ".asciz"); diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp index 1be29cf3c94b9..d7809e27e23f3 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp @@ -57,17 +57,16 @@ unsigned MipsABIInfo::GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const { llvm_unreachable("Unhandled ABI"); } -MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU, - const MCTargetOptions &Options) { - if (Options.getABIName().starts_with("o32")) +MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef ABIName) { + if (ABIName.starts_with("o32")) return MipsABIInfo::O32(); - if (Options.getABIName().starts_with("n32")) + if (ABIName.starts_with("n32")) return MipsABIInfo::N32(); - if (Options.getABIName().starts_with("n64")) + if (ABIName.starts_with("n64")) return MipsABIInfo::N64(); if (TT.isABIN32()) return MipsABIInfo::N32(); - assert(Options.getABIName().empty() && "Unknown ABI option for MIPS"); + assert(ABIName.empty() && "Unknown ABI option for MIPS"); if (TT.isMIPS64()) return MipsABIInfo::N64(); diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h index 44b023c7c3ef6..d8003d2fcc164 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h @@ -33,8 +33,7 @@ class MipsABIInfo { static MipsABIInfo O32() { return MipsABIInfo(ABI::O32); } static MipsABIInfo N32() { return MipsABIInfo(ABI::N32); } static MipsABIInfo N64() { return MipsABIInfo(ABI::N64); } - static MipsABIInfo computeTargetABI(const Triple &TT, StringRef CPU, - const MCTargetOptions &Options); + static MipsABIInfo computeTargetABI(const Triple &TT, StringRef ABIName); bool IsKnown() const { return ThisABI != ABI::Unknown; } bool IsO32() const { return ThisABI == ABI::O32; } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 33aab71044b09..74e7baf1db293 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -619,7 +619,7 @@ MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, return new WindowsMipsAsmBackend(T, MRI, STI); MipsABIInfo ABI = MipsABIInfo::computeTargetABI(STI.getTargetTriple(), - STI.getCPU(), Options); + Options.getABIName()); return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), ABI.IsN32()); } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index 8b28ee62b878c..e1c9954c19cc0 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -24,7 +24,8 @@ MipsELFMCAsmInfo::MipsELFMCAsmInfo(const Triple &TheTriple, const MCTargetOptions &Options) { IsLittleEndian = TheTriple.isLittleEndian(); - MipsABIInfo ABI = MipsABIInfo::computeTargetABI(TheTriple, "", Options); + MipsABIInfo ABI = + MipsABIInfo::computeTargetABI(TheTriple, Options.getABIName()); if (TheTriple.isMIPS64() && !ABI.IsN32()) CodePointerSize = CalleeSaveStackSlotSize = 8; diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp index 8c519fa379dd8..03bedc5b15c4f 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -77,42 +77,6 @@ static std::unique_ptr createTLOF(const Triple &TT) { return std::make_unique(); } -static std::string computeDataLayout(const Triple &TT, StringRef CPU, - const TargetOptions &Options, - bool isLittle) { - std::string Ret; - MipsABIInfo ABI = MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions); - - // There are both little and big endian mips. - if (isLittle) - Ret += "e"; - else - Ret += "E"; - - if (ABI.IsO32()) - Ret += "-m:m"; - else - Ret += "-m:e"; - - // Pointers are 32 bit on some ABIs. - if (!ABI.IsN64()) - Ret += "-p:32:32"; - - // 8 and 16 bit integers only need to have natural alignment, but try to - // align them to 32 bits. 64 bit integers have natural alignment. - Ret += "-i8:8:32-i16:16:32-i64:64"; - - // 32 bit registers are always available and the stack is at least 64 bit - // aligned. On N64 64 bit registers are also available and the stack is - // 128 bit aligned. - if (ABI.IsN64() || ABI.IsN32()) - Ret += "-i128:128-n32:64-S128"; - else - Ret += "-n32-S64"; - - return Ret; -} - static Reloc::Model getEffectiveRelocModel(bool JIT, std::optional RM) { if (!RM || JIT) @@ -132,12 +96,12 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT, std::optional CM, CodeGenOptLevel OL, bool JIT, bool isLittle) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT, CPU, Options, isLittle), - TT, CPU, FS, Options, - getEffectiveRelocModel(JIT, RM), - getEffectiveCodeModel(CM, CodeModel::Small), OL), + : CodeGenTargetMachineImpl( + T, TT.computeDataLayout(Options.MCOptions.getABIName()), TT, CPU, FS, + Options, getEffectiveRelocModel(JIT, RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), isLittle(isLittle), TLOF(createTLOF(getTargetTriple())), - ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)), + ABI(MipsABIInfo::computeTargetABI(TT, Options.MCOptions.getABIName())), Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this, std::nullopt), NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16", diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 833f014a4c870..a6837a482608c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -118,24 +118,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() { initializeNVPTXPrologEpilogPassPass(PR); } -static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) { - std::string Ret = "e"; - - // Tensor Memory (addrspace:6) is always 32-bits. - // Distributed Shared Memory (addrspace:7) follows shared memory - // (addrspace:3). - if (!is64Bit) - Ret += "-p:32:32-p6:32:32-p7:32:32"; - else if (UseShortPointers) - Ret += "-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32"; - else - Ret += "-p6:32:32"; - - Ret += "-i64:64-i128:128-i256:256-v16:16-v32:32-n16:32:64"; - - return Ret; -} - NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -144,10 +126,10 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, CodeGenOptLevel OL, bool is64bit) // The pic relocation model is used regardless of what the client has // specified, as it is the only relocation model currently supported. - : CodeGenTargetMachineImpl(T, - computeDataLayout(is64bit, UseShortPointersOpt), - TT, CPU, FS, Options, Reloc::PIC_, - getEffectiveCodeModel(CM, CodeModel::Small), OL), + : CodeGenTargetMachineImpl( + T, TT.computeDataLayout(UseShortPointersOpt ? "shortptr" : ""), TT, + CPU, FS, Options, Reloc::PIC_, + getEffectiveCodeModel(CM, CodeModel::Small), OL), is64bit(is64bit), TLOF(std::make_unique()), Subtarget(TT, std::string(CPU), std::string(FS), *this), StrPool(StrAlloc) { diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index ae92d5eab20cd..000d29610678f 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -149,58 +149,6 @@ LLVMInitializePowerPCTarget() { initializePPCAIXAsmPrinterPass(PR); } -static bool isLittleEndianTriple(const Triple &T) { - return T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle; -} - -/// Return the datalayout string of a subtarget. -static std::string getDataLayoutString(const Triple &T) { - bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le; - std::string Ret; - - // Most PPC* platforms are big endian, PPC(64)LE is little endian. - if (isLittleEndianTriple(T)) - Ret = "e"; - else - Ret = "E"; - - Ret += DataLayout::getManglingComponent(T); - - // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit - // pointers. - if (!is64Bit || T.getOS() == Triple::Lv2) - Ret += "-p:32:32"; - - // If the target ABI uses function descriptors, then the alignment of function - // pointers depends on the alignment used to emit the descriptor. Otherwise, - // function pointers are aligned to 32 bits because the instructions must be. - if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) { - Ret += "-Fi64"; - } else if (T.isOSAIX()) { - Ret += is64Bit ? "-Fi64" : "-Fi32"; - } else { - Ret += "-Fn32"; - } - - // Note, the alignment values for f64 and i64 on ppc64 in Darwin - // documentation are wrong; these are correct (i.e. "what gcc does"). - Ret += "-i64:64"; - - // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. - if (is64Bit) - Ret += "-i128:128-n32:64"; - else - Ret += "-n32"; - - // Specify the vector alignment explicitly. For v256i1 and v512i1, the - // calculated alignment would be 256*alignment(i1) and 512*alignment(i1), - // which is 256 and 512 bytes - way over aligned. - if (is64Bit && (T.isOSAIX() || T.isOSLinux())) - Ret += "-S128-v256:256:256-v512:512:512"; - - return Ret; -} - static std::string computeFSAdditions(StringRef FS, CodeGenOptLevel OL, const Triple &TT) { std::string FullFS = std::string(FS); @@ -348,13 +296,13 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, getDataLayoutString(TT), TT, CPU, + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, computeFSAdditions(FS, OL, TT), Options, getEffectiveRelocModel(TT, RM), getEffectivePPCCodeModel(TT, CM, JIT), OL), TLOF(createTLOF(getTargetTriple())), TargetABI(computeTargetABI(TT, Options)), - Endianness(isLittleEndianTriple(TT) ? Endian::LITTLE : Endian::BIG) { + Endianness(TT.isLittleEndian() ? Endian::LITTLE : Endian::BIG) { initAsmInfo(); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 460bb33f2553a..a1ec24f1fe719 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -141,39 +141,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVAsmPrinterPass(*PR); } -static std::string computeDataLayout(const Triple &TT, - const TargetOptions &Opts) { - std::string Ret; - - if (TT.isLittleEndian()) - Ret += "e"; - else - Ret += "E"; - - Ret += "-m:e"; - - // Pointer and integer sizes. - if (TT.isArch64Bit()) { - Ret += "-p:64:64-i64:64-i128:128"; - Ret += "-n32:64"; - } else { - assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported"); - Ret += "-p:32:32-i64:64"; - Ret += "-n32"; - } - - // Stack alignment based on ABI. - StringRef ABI = Opts.MCOptions.getABIName(); - if (ABI == "ilp32e") - Ret += "-S32"; - else if (ABI == "lp64e") - Ret += "-S64"; - else - Ret += "-S128"; - - return Ret; -} - static Reloc::Model getEffectiveRelocModel(const Triple &TT, std::optional RM) { return RM.value_or(Reloc::Static); @@ -185,9 +152,10 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT, Options), TT, CPU, FS, - Options, getEffectiveRelocModel(TT, RM), - getEffectiveCodeModel(CM, CodeModel::Small), OL), + : CodeGenTargetMachineImpl( + T, TT.computeDataLayout(Options.MCOptions.getABIName()), TT, CPU, FS, + Options, getEffectiveRelocModel(TT, RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()) { initAsmInfo(); diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 0eac43d8469df..9f6f9c7225357 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -60,30 +60,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTarget() { initializeSPIRVStripConvergentIntrinsicsPass(PR); } -static std::string computeDataLayout(const Triple &TT) { - const auto Arch = TT.getArch(); - // TODO: this probably needs to be revisited: - // Logical SPIR-V has no pointer size, so any fixed pointer size would be - // wrong. The choice to default to 32 or 64 is just motivated by another - // memory model used for graphics: PhysicalStorageBuffer64. But it shouldn't - // mean anything. - if (Arch == Triple::spirv32) - return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-" - "v256:256-v512:512-v1024:1024-n8:16:32:64-G1"; - if (Arch == Triple::spirv) - return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-" - "v512:512-v1024:1024-n8:16:32:64-G10"; - if (TT.getVendor() == Triple::VendorType::AMD && - TT.getOS() == Triple::OSType::AMDHSA) - return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-" - "v512:512-v1024:1024-n32:64-S32-G1-P4-A0"; - if (TT.getVendor() == Triple::VendorType::Intel) - return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-" - "v512:512-v1024:1024-n8:16:32:64-G1-P9-A0"; - return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-" - "v512:512-v1024:1024-n8:16:32:64-G1"; -} - static Reloc::Model getEffectiveRelocModel(std::optional RM) { if (!RM) return Reloc::PIC_; @@ -99,7 +75,7 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT), TT, CPU, FS, Options, + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()), diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp index 754c8f63ca4ec..27ab57c11cf71 100644 --- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp @@ -38,39 +38,6 @@ static cl::opt BranchRelaxation("sparc-enable-branch-relax", cl::Hidden, cl::init(true), cl::desc("Relax out of range conditional branches")); -static std::string computeDataLayout(const Triple &T) { - const bool is64Bit = T.isSPARC64(); - - // Sparc is typically big endian, but some are little. - std::string Ret = T.getArch() == Triple::sparcel ? "e" : "E"; - Ret += "-m:e"; - - // Some ABIs have 32bit pointers. - if (!is64Bit) - Ret += "-p:32:32"; - - // Alignments for 64 bit integers. - Ret += "-i64:64"; - - // Alignments for 128 bit integers. - // This is not specified in the ABI document but is the de facto standard. - Ret += "-i128:128"; - - // On SparcV9 128 floats are aligned to 128 bits, on others only to 64. - // On SparcV9 registers can hold 64 or 32 bits, on others only 32. - if (is64Bit) - Ret += "-n32:64"; - else - Ret += "-f128:64-n32"; - - if (is64Bit) - Ret += "-S128"; - else - Ret += "-S64"; - - return Ret; -} - static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } @@ -111,7 +78,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const Triple &TT, std::optional CM, CodeGenOptLevel OL, bool JIT) : CodeGenTargetMachineImpl( - T, computeDataLayout(TT), TT, CPU, FS, Options, + T, TT.computeDataLayout(), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveSparcCodeModel(CM, getEffectiveRelocModel(RM), TT.isSPARC64(), JIT), diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index ece8928accd0c..3d0c04b574933 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -54,47 +54,6 @@ LLVMInitializeSystemZTarget() { initializeSystemZCopyPhysRegsPass(PR); } -static std::string computeDataLayout(const Triple &TT) { - std::string Ret; - - // Big endian. - Ret += "E"; - - // Data mangling. - Ret += DataLayout::getManglingComponent(TT); - - // Special features for z/OS. - if (TT.isOSzOS()) { - if (TT.isArch64Bit()) { - // Custom address space for ptr32. - Ret += "-p1:32:32"; - } - } - - // Make sure that global data has at least 16 bits of alignment by - // default, so that we can refer to it using LARL. We don't have any - // special requirements for stack variables though. - Ret += "-i1:8:16-i8:8:16"; - - // 64-bit integers are naturally aligned. - Ret += "-i64:64"; - - // 128-bit floats are aligned only to 64 bits. - Ret += "-f128:64"; - - // The DataLayout string always holds a vector alignment of 64 bits, see - // comment in clang/lib/Basic/Targets/SystemZ.h. - Ret += "-v128:64"; - - // We prefer 16 bits of aligned for all globals; see above. - Ret += "-a:8:16"; - - // Integer registers are 32 or 64 bits. - Ret += "-n32:64"; - - return Ret; -} - static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSzOS()) return std::make_unique(); @@ -163,7 +122,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, std::optional CM, CodeGenOptLevel OL, bool JIT) : CodeGenTargetMachineImpl( - T, computeDataLayout(TT), TT, CPU, FS, Options, + T, TT.computeDataLayout(), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL), diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp index 14b8e330d87a4..dc9ca48cc221b 100644 --- a/llvm/lib/Target/VE/VETargetMachine.cpp +++ b/llvm/lib/Target/VE/VETargetMachine.cpp @@ -35,38 +35,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETarget() { initializeVEDAGToDAGISelLegacyPass(PR); } -static std::string computeDataLayout(const Triple &T) { - // Aurora VE is little endian - std::string Ret = "e"; - - // Use ELF mangling - Ret += "-m:e"; - - // Alignments for 64 bit integers. - Ret += "-i64:64"; - - // VE supports 32 bit and 64 bits integer on registers - Ret += "-n32:64"; - - // Stack alignment is 128 bits - Ret += "-S128"; - - // Vector alignments are 64 bits - // Need to define all of them. Otherwise, each alignment becomes - // the size of each data by default. - Ret += "-v64:64:64"; // for v2f32 - Ret += "-v128:64:64"; - Ret += "-v256:64:64"; - Ret += "-v512:64:64"; - Ret += "-v1024:64:64"; - Ret += "-v2048:64:64"; - Ret += "-v4096:64:64"; - Ret += "-v8192:64:64"; - Ret += "-v16384:64:64"; // for v256f64 - - return Ret; -} - static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } @@ -91,7 +59,7 @@ VETargetMachine::VETargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT), TT, CPU, FS, Options, + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(createTLOF()), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 6827ee6527947..a9c638cde1259 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -196,19 +196,9 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl( - T, - TT.isArch64Bit() - ? (TT.isOSEmscripten() ? "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-" - "i128:128-f128:64-n32:64-S128-ni:1:10:20" - : "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-" - "i128:128-n32:64-S128-ni:1:10:20") - : (TT.isOSEmscripten() ? "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-" - "i128:128-f128:64-n32:64-S128-ni:1:10:20" - : "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-" - "i128:128-n32:64-S128-ni:1:10:20"), - TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT), - getEffectiveCodeModel(CM, CodeModel::Large), OL), + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, + getEffectiveRelocModel(RM, TT), + getEffectiveCodeModel(CM, CodeModel::Large), OL), TLOF(new WebAssemblyTargetObjectFile()), UsesMultivalueABI(Options.MCOptions.getABIName() == "experimental-mv") { // WebAssembly type-checks instructions, but a noreturn function with a return diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 6d9c6cdedd9e5..babbe95cc7808 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -125,54 +125,6 @@ static std::unique_ptr createTLOF(const Triple &TT) { return std::make_unique(); } -static std::string computeDataLayout(const Triple &TT) { - // X86 is little endian - std::string Ret = "e"; - - Ret += DataLayout::getManglingComponent(TT); - // X86 and x32 have 32 bit pointers. - if (!TT.isArch64Bit() || TT.isX32()) - Ret += "-p:32:32"; - - // Address spaces for 32 bit signed, 32 bit unsigned, and 64 bit pointers. - Ret += "-p270:32:32-p271:32:32-p272:64:64"; - - // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32. - // 128 bit integers are not specified in the 32-bit ABIs but are used - // internally for lowering f128, so we match the alignment to that. - if (TT.isArch64Bit() || TT.isOSWindows()) - Ret += "-i64:64-i128:128"; - else if (TT.isOSIAMCU()) - Ret += "-i64:32-f64:32"; - else - Ret += "-i128:128-f64:32:64"; - - // Some ABIs align long double to 128 bits, others to 32. - if (TT.isOSIAMCU()) - ; // No f80 - else if (TT.isArch64Bit() || TT.isOSDarwin() || TT.isWindowsMSVCEnvironment()) - Ret += "-f80:128"; - else - Ret += "-f80:32"; - - if (TT.isOSIAMCU()) - Ret += "-f128:32"; - - // The registers can hold 8, 16, 32 or, in x86-64, 64 bits. - if (TT.isArch64Bit()) - Ret += "-n8:16:32:64"; - else - Ret += "-n8:16:32"; - - // The stack is aligned to 32 bits on some ABIs and 128 bits on others. - if ((!TT.isArch64Bit() && TT.isOSWindows()) || TT.isOSIAMCU()) - Ret += "-a:0:32-S32"; - else - Ret += "-S128"; - - return Ret; -} - static Reloc::Model getEffectiveRelocModel(const Triple &TT, bool JIT, std::optional RM) { bool is64Bit = TT.getArch() == Triple::x86_64; @@ -236,7 +188,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT), TT, CPU, FS, Options, + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, getEffectiveRelocModel(TT, JIT, RM), getEffectiveX86CodeModel(TT, CM, JIT), OL), TLOF(createTLOF(getTargetTriple())), IsJIT(JIT) { diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp index c9f1ca8b46dab..72cb61b5e864e 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp @@ -32,13 +32,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXtensaTarget() { initializeXtensaAsmPrinterPass(PR); } -static std::string computeDataLayout(const Triple &TT, StringRef CPU, - const TargetOptions &Options, - bool IsLittle) { - std::string Ret = "e-m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32"; - return Ret; -} - static Reloc::Model getEffectiveRelocModel(bool JIT, std::optional RM) { if (!RM || JIT) @@ -53,8 +46,7 @@ XtensaTargetMachine::XtensaTargetMachine(const Target &T, const Triple &TT, std::optional CM, CodeGenOptLevel OL, bool JIT, bool IsLittle) - : CodeGenTargetMachineImpl(T, computeDataLayout(TT, CPU, Options, IsLittle), - TT, CPU, FS, Options, + : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options, getEffectiveRelocModel(JIT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()) { diff --git a/llvm/lib/TargetParser/CMakeLists.txt b/llvm/lib/TargetParser/CMakeLists.txt index 5eecfbf80b2f7..e1a30199e1ade 100644 --- a/llvm/lib/TargetParser/CMakeLists.txt +++ b/llvm/lib/TargetParser/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_component_library(LLVMTargetParser SubtargetFeature.cpp TargetParser.cpp Triple.cpp + TargetDataLayout.cpp X86TargetParser.cpp XtensaTargetParser.cpp diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp new file mode 100644 index 0000000000000..e222588ea389b --- /dev/null +++ b/llvm/lib/TargetParser/TargetDataLayout.cpp @@ -0,0 +1,629 @@ +//===--- TargetDataLayout.cpp - Map Triple to LLVM data layout string -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/TargetParser/ARMTargetParser.h" +#include "llvm/TargetParser/Triple.h" +#include +using namespace llvm; + +static StringRef getManglingComponent(const Triple &T) { + if (T.isOSBinFormatGOFF()) + return "-m:l"; + if (T.isOSBinFormatMachO()) + return "-m:o"; + if ((T.isOSWindows() || T.isUEFI()) && T.isOSBinFormatCOFF()) + return T.getArch() == Triple::x86 ? "-m:x" : "-m:w"; + if (T.isOSBinFormatXCOFF()) + return "-m:a"; + return "-m:e"; +} + +static std::string computeARMDataLayout(const Triple &TT, StringRef ABIName) { + auto ABI = ARM::computeTargetABI(TT, ABIName); + std::string Ret; + + if (TT.isLittleEndian()) + // Little endian. + Ret += "e"; + else + // Big endian. + Ret += "E"; + + Ret += getManglingComponent(TT); + + // Pointers are 32 bits and aligned to 32 bits. + Ret += "-p:32:32"; + + // Function pointers are aligned to 8 bits (because the LSB stores the + // ARM/Thumb state). + Ret += "-Fi8"; + + // ABIs other than APCS have 64 bit integers with natural alignment. + if (ABI != ARM::ARM_ABI_APCS) + Ret += "-i64:64"; + + // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 + // bits, others to 64 bits. We always try to align to 64 bits. + if (ABI == ARM::ARM_ABI_APCS) + Ret += "-f64:32:64"; + + // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others + // to 64. We always ty to give them natural alignment. + if (ABI == ARM::ARM_ABI_APCS) + Ret += "-v64:32:64-v128:32:128"; + else if (ABI != ARM::ARM_ABI_AAPCS16) + Ret += "-v128:64:128"; + + // Try to align aggregates to 32 bits (the default is 64 bits, which has no + // particular hardware support on 32-bit ARM). + Ret += "-a:0:32"; + + // Integer registers are 32 bits. + Ret += "-n32"; + + // The stack is 64 bit aligned on AAPCS and 32 bit aligned everywhere else. + if (ABI == ARM::ARM_ABI_AAPCS16) + Ret += "-S128"; + else if (ABI == ARM::ARM_ABI_AAPCS) + Ret += "-S64"; + else + Ret += "-S32"; + + return Ret; +} + +// Helper function to build a DataLayout string +static std::string computeAArch64DataLayout(const Triple &TT) { + if (TT.isOSBinFormatMachO()) { + if (TT.getArch() == Triple::aarch64_32) + return "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-" + "n32:64-S128-Fn32"; + return "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-" + "Fn32"; + } + if (TT.isOSBinFormatCOFF()) + return "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:" + "128-n32:64-S128-Fn32"; + std::string Endian = TT.isLittleEndian() ? "e" : "E"; + std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : ""; + return Endian + "-m:e" + Ptr32 + + "-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-" + "n32:64-S128-Fn32"; +} + +// DataLayout: little or big endian +static std::string computeBPFDataLayout(const Triple &TT) { + if (TT.getArch() == Triple::bpfeb) + return "E-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; + else + return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; +} + +static std::string computeCSKYDataLayout(const Triple &TT) { + // CSKY is always 32-bit target with the CSKYv2 ABI as prefer now. + // It's a 4-byte aligned stack with ELF mangling only. + // Only support little endian for now. + // TODO: Add support for big endian. + return "e-m:e-S32-p:32:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:32" + "-v128:32:32-a:0:32-Fi32-n32"; +} + +static std::string computeLoongArchDataLayout(const Triple &TT) { + if (TT.isLoongArch64()) + return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; + assert(TT.isLoongArch32() && "only LA32 and LA64 are currently supported"); + return "e-m:e-p:32:32-i64:64-n32-S128"; +} + +static std::string computeM68kDataLayout(const Triple &TT) { + std::string Ret = ""; + // M68k is Big Endian + Ret += "E"; + + // FIXME how to wire it with the used object format? + Ret += "-m:e"; + + // M68k pointers are always 32 bit wide even for 16-bit CPUs. + // The ABI only specifies 16-bit alignment. + // On at least the 68020+ with a 32-bit bus, there is a performance benefit + // to having 32-bit alignment. + Ret += "-p:32:16:32"; + + // Bytes do not require special alignment, words are word aligned and + // long words are word aligned at minimum. + Ret += "-i8:8:8-i16:16:16-i32:16:32"; + + // FIXME no floats at the moment + + // The registers can hold 8, 16, 32 bits + Ret += "-n8:16:32"; + + Ret += "-a:0:16-S16"; + + return Ret; +} + +namespace { +enum class MipsABI { Unknown, O32, N32, N64 }; +} + +// FIXME: This duplicates MipsABIInfo::computeTargetABI, but duplicating this is +// preferable to violating layering rules. Ideally that information should live +// in LLVM TargetParser, but for now we just duplicate some ABI name string +// logic for simplicity. +static MipsABI getMipsABI(const Triple &TT, StringRef ABIName) { + if (ABIName.starts_with("o32")) + return MipsABI::O32; + if (ABIName.starts_with("n32")) + return MipsABI::N32; + if (ABIName.starts_with("n64")) + return MipsABI::N64; + if (TT.isABIN32()) + return MipsABI::N32; + assert(ABIName.empty() && "Unknown ABI option for MIPS"); + + if (TT.isMIPS64()) + return MipsABI::N64; + return MipsABI::O32; +} + +static std::string computeMipsDataLayout(const Triple &TT, StringRef ABIName) { + std::string Ret; + MipsABI ABI = getMipsABI(TT, ABIName); + + // There are both little and big endian mips. + if (TT.isLittleEndian()) + Ret += "e"; + else + Ret += "E"; + + if (ABI == MipsABI::O32) + Ret += "-m:m"; + else + Ret += "-m:e"; + + // Pointers are 32 bit on some ABIs. + if (ABI != MipsABI::N64) + Ret += "-p:32:32"; + + // 8 and 16 bit integers only need to have natural alignment, but try to + // align them to 32 bits. 64 bit integers have natural alignment. + Ret += "-i8:8:32-i16:16:32-i64:64"; + + // 32 bit registers are always available and the stack is at least 64 bit + // aligned. On N64 64 bit registers are also available and the stack is + // 128 bit aligned. + if (ABI == MipsABI::N64 || ABI == MipsABI::N32) + Ret += "-i128:128-n32:64-S128"; + else + Ret += "-n32-S64"; + + return Ret; +} + +static std::string computePowerDataLayout(const Triple &T) { + bool is64Bit = T.isPPC64(); + std::string Ret; + + // Most PPC* platforms are big endian, PPC(64)LE is little endian. + if (T.isLittleEndian()) + Ret = "e"; + else + Ret = "E"; + + Ret += getManglingComponent(T); + + // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit + // pointers. + if (!is64Bit || T.getOS() == Triple::Lv2) + Ret += "-p:32:32"; + + // If the target ABI uses function descriptors, then the alignment of function + // pointers depends on the alignment used to emit the descriptor. Otherwise, + // function pointers are aligned to 32 bits because the instructions must be. + if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) { + Ret += "-Fi64"; + } else if (T.isOSAIX()) { + Ret += is64Bit ? "-Fi64" : "-Fi32"; + } else { + Ret += "-Fn32"; + } + + // Note, the alignment values for f64 and i64 on ppc64 in Darwin + // documentation are wrong; these are correct (i.e. "what gcc does"). + Ret += "-i64:64"; + + // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. + if (is64Bit) + Ret += "-i128:128-n32:64"; + else + Ret += "-n32"; + + // Specify the vector alignment explicitly. For v256i1 and v512i1, the + // calculated alignment would be 256*alignment(i1) and 512*alignment(i1), + // which is 256 and 512 bytes - way over aligned. + if (is64Bit && (T.isOSAIX() || T.isOSLinux())) + Ret += "-S128-v256:256:256-v512:512:512"; + + return Ret; +} + +static std::string computeAMDDataLayout(const Triple &TT) { + if (TT.getArch() == Triple::r600) { + // 32-bit pointers. + return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; + } + + // 32-bit private, local, and region pointers. 64-bit global, constant and + // flat. 160-bit non-integral fat buffer pointers that include a 128-bit + // buffer descriptor and a 32-bit offset, which are indexed by 32-bit values + // (address space 7), and 128-bit non-integral buffer resourcees (address + // space 8) which cannot be non-trivilally accessed by LLVM memory operations + // like getelementptr. + return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" + "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-" + "v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; +} + +static std::string computeRISCVDataLayout(const Triple &TT, StringRef ABIName) { + std::string Ret; + + if (TT.isLittleEndian()) + Ret += "e"; + else + Ret += "E"; + + Ret += "-m:e"; + + // Pointer and integer sizes. + if (TT.isRISCV64()) { + Ret += "-p:64:64-i64:64-i128:128"; + Ret += "-n32:64"; + } else { + assert(TT.isRISCV32() && "only RV32 and RV64 are currently supported"); + Ret += "-p:32:32-i64:64"; + Ret += "-n32"; + } + + // Stack alignment based on ABI. + StringRef ABI = ABIName; + if (ABI == "ilp32e") + Ret += "-S32"; + else if (ABI == "lp64e") + Ret += "-S64"; + else + Ret += "-S128"; + + return Ret; +} + +static std::string computeSparcDataLayout(const Triple &T) { + const bool Is64Bit = T.isSPARC64(); + + // Sparc is typically big endian, but some are little. + std::string Ret = T.getArch() == Triple::sparcel ? "e" : "E"; + Ret += "-m:e"; + + // Some ABIs have 32bit pointers. + if (!Is64Bit) + Ret += "-p:32:32"; + + // Alignments for 64 bit integers. + Ret += "-i64:64"; + + // Alignments for 128 bit integers. + // This is not specified in the ABI document but is the de facto standard. + Ret += "-i128:128"; + + // On SparcV9 128 floats are aligned to 128 bits, on others only to 64. + // On SparcV9 registers can hold 64 or 32 bits, on others only 32. + if (Is64Bit) + Ret += "-n32:64"; + else + Ret += "-f128:64-n32"; + + if (Is64Bit) + Ret += "-S128"; + else + Ret += "-S64"; + + return Ret; +} + +static std::string computeSystemZDataLayout(const Triple &TT) { + std::string Ret; + + // Big endian. + Ret += "E"; + + // Data mangling. + Ret += getManglingComponent(TT); + + // Special features for z/OS. + if (TT.isOSzOS()) { + // Custom address space for ptr32. + Ret += "-p1:32:32"; + } + + // Make sure that global data has at least 16 bits of alignment by + // default, so that we can refer to it using LARL. We don't have any + // special requirements for stack variables though. + Ret += "-i1:8:16-i8:8:16"; + + // 64-bit integers are naturally aligned. + Ret += "-i64:64"; + + // 128-bit floats are aligned only to 64 bits. + Ret += "-f128:64"; + + // The DataLayout string always holds a vector alignment of 64 bits, see + // comment in clang/lib/Basic/Targets/SystemZ.h. + Ret += "-v128:64"; + + // We prefer 16 bits of aligned for all globals; see above. + Ret += "-a:8:16"; + + // Integer registers are 32 or 64 bits. + Ret += "-n32:64"; + + return Ret; +} + +static std::string computeX86DataLayout(const Triple &TT) { + bool Is64Bit = TT.getArch() == Triple::x86_64; + + // X86 is little endian + std::string Ret = "e"; + + Ret += getManglingComponent(TT); + // X86 and x32 have 32 bit pointers. + if (!Is64Bit || TT.isX32()) + Ret += "-p:32:32"; + + // Address spaces for 32 bit signed, 32 bit unsigned, and 64 bit pointers. + Ret += "-p270:32:32-p271:32:32-p272:64:64"; + + // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32. + // 128 bit integers are not specified in the 32-bit ABIs but are used + // internally for lowering f128, so we match the alignment to that. + if (Is64Bit || TT.isOSWindows()) + Ret += "-i64:64-i128:128"; + else if (TT.isOSIAMCU()) + Ret += "-i64:32-f64:32"; + else + Ret += "-i128:128-f64:32:64"; + + // Some ABIs align long double to 128 bits, others to 32. + if (TT.isOSIAMCU()) + ; // No f80 + else if (Is64Bit || TT.isOSDarwin() || TT.isWindowsMSVCEnvironment()) + Ret += "-f80:128"; + else + Ret += "-f80:32"; + + if (TT.isOSIAMCU()) + Ret += "-f128:32"; + + // The registers can hold 8, 16, 32 or, in x86-64, 64 bits. + if (Is64Bit) + Ret += "-n8:16:32:64"; + else + Ret += "-n8:16:32"; + + // The stack is aligned to 32 bits on some ABIs and 128 bits on others. + if ((!Is64Bit && TT.isOSWindows()) || TT.isOSIAMCU()) + Ret += "-a:0:32-S32"; + else + Ret += "-S128"; + + return Ret; +} + +static std::string computeNVPTXDataLayout(const Triple &T, StringRef ABIName) { + bool Is64Bit = T.getArch() == Triple::nvptx64; + std::string Ret = "e"; + + // Tensor Memory (addrspace:6) is always 32-bits. + // Distributed Shared Memory (addrspace:7) follows shared memory + // (addrspace:3). + if (!Is64Bit) + Ret += "-p:32:32-p6:32:32-p7:32:32"; + else if (ABIName == "shortptr") + Ret += "-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32"; + else + Ret += "-p6:32:32"; + + Ret += "-i64:64-i128:128-i256:256-v16:16-v32:32-n16:32:64"; + + return Ret; +} + +static std::string computeSPIRVDataLayout(const Triple &TT) { + const auto Arch = TT.getArch(); + // TODO: this probably needs to be revisited: + // Logical SPIR-V has no pointer size, so any fixed pointer size would be + // wrong. The choice to default to 32 or 64 is just motivated by another + // memory model used for graphics: PhysicalStorageBuffer64. But it shouldn't + // mean anything. + if (Arch == Triple::spirv32) + return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-" + "v256:256-v512:512-v1024:1024-n8:16:32:64-G1"; + if (Arch == Triple::spirv) + return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-" + "v512:512-v1024:1024-n8:16:32:64-G10"; + if (TT.getVendor() == Triple::VendorType::AMD && + TT.getOS() == Triple::OSType::AMDHSA) + return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-" + "v512:512-v1024:1024-n32:64-S32-G1-P4-A0"; + if (TT.getVendor() == Triple::VendorType::Intel) + return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-" + "v512:512-v1024:1024-n8:16:32:64-G1-P9-A0"; + return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-" + "v512:512-v1024:1024-n8:16:32:64-G1"; +} + +static std::string computeLanaiDataLayout() { + // Data layout (keep in sync with clang/lib/Basic/Targets.cpp) + return "E" // Big endian + "-m:e" // ELF name manging + "-p:32:32" // 32-bit pointers, 32 bit aligned + "-i64:64" // 64 bit integers, 64 bit aligned + "-a:0:32" // 32 bit alignment of objects of aggregate type + "-n32" // 32 bit native integer width + "-S64"; // 64 bit natural stack alignment +} + +static std::string computeWebAssemblyDataLayout(const Triple &TT) { + return TT.getArch() == Triple::wasm64 + ? (TT.isOSEmscripten() ? "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-" + "i128:128-f128:64-n32:64-S128-ni:1:10:20" + : "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-" + "i128:128-n32:64-S128-ni:1:10:20") + : (TT.isOSEmscripten() ? "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-" + "i128:128-f128:64-n32:64-S128-ni:1:10:20" + : "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-" + "i128:128-n32:64-S128-ni:1:10:20"); +} + +static std::string computeVEDataLayout(const Triple &T) { + // Aurora VE is little endian + std::string Ret = "e"; + + // Use ELF mangling + Ret += "-m:e"; + + // Alignments for 64 bit integers. + Ret += "-i64:64"; + + // VE supports 32 bit and 64 bits integer on registers + Ret += "-n32:64"; + + // Stack alignment is 128 bits + Ret += "-S128"; + + // Vector alignments are 64 bits + // Need to define all of them. Otherwise, each alignment becomes + // the size of each data by default. + Ret += "-v64:64:64"; // for v2f32 + Ret += "-v128:64:64"; + Ret += "-v256:64:64"; + Ret += "-v512:64:64"; + Ret += "-v1024:64:64"; + Ret += "-v2048:64:64"; + Ret += "-v4096:64:64"; + Ret += "-v8192:64:64"; + Ret += "-v16384:64:64"; // for v256f64 + + return Ret; +} + +std::string Triple::computeDataLayout(StringRef ABIName) const { + switch (getArch()) { + case Triple::arm: + case Triple::armeb: + case Triple::thumb: + case Triple::thumbeb: + return computeARMDataLayout(*this, ABIName); + case Triple::aarch64: + case Triple::aarch64_be: + case Triple::aarch64_32: + return computeAArch64DataLayout(*this); + case Triple::arc: + return "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-" + "f32:32:32-i64:32-f64:32-a:0:32-n32"; + case Triple::avr: + return "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8:16-a:8"; + case Triple::bpfel: + case Triple::bpfeb: + return computeBPFDataLayout(*this); + case Triple::csky: + return computeCSKYDataLayout(*this); + case Triple::dxil: + return "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-" + "f32:32-f64:64-n8:16:32:64"; + case Triple::hexagon: + return "e-m:e-p:32:32:32-a:0-n16:32-" + "i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-" + "v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"; + case Triple::loongarch32: + case Triple::loongarch64: + return computeLoongArchDataLayout(*this); + case Triple::m68k: + return computeM68kDataLayout(*this); + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: + case Triple::mips64el: + return computeMipsDataLayout(*this, ABIName); + case Triple::msp430: + return "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16"; + case Triple::ppc: + case Triple::ppcle: + case Triple::ppc64: + case Triple::ppc64le: + return computePowerDataLayout(*this); + case Triple::r600: + case Triple::amdgcn: + return computeAMDDataLayout(*this); + case Triple::riscv32: + case Triple::riscv64: + case Triple::riscv32be: + case Triple::riscv64be: + return computeRISCVDataLayout(*this, ABIName); + case Triple::sparc: + case Triple::sparcv9: + case Triple::sparcel: + return computeSparcDataLayout(*this); + case Triple::systemz: + return computeSystemZDataLayout(*this); + case Triple::tce: + case Triple::tcele: + case Triple::x86: + case Triple::x86_64: + return computeX86DataLayout(*this); + case Triple::xcore: + case Triple::xtensa: + return "e-m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32"; + case Triple::nvptx: + case Triple::nvptx64: + return computeNVPTXDataLayout(*this, ABIName); + case Triple::spir: + case Triple::spir64: + case Triple::spirv: + case Triple::spirv32: + case Triple::spirv64: + return computeSPIRVDataLayout(*this); + case Triple::lanai: + return computeLanaiDataLayout(); + case Triple::wasm32: + case Triple::wasm64: + return computeWebAssemblyDataLayout(*this); + case Triple::ve: + return computeVEDataLayout(*this); + + case Triple::amdil: + case Triple::amdil64: + case Triple::hsail: + case Triple::hsail64: + case Triple::kalimba: + case Triple::shave: + case Triple::renderscript32: + case Triple::renderscript64: + // These are all virtual ISAs with no LLVM backend, and therefore no fixed + // LLVM data layout. + return ""; + + case Triple::UnknownArch: + return ""; + } + llvm_unreachable("Invalid arch"); +} diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp index afa72a53ab2c0..e0c0f35847f07 100644 --- a/llvm/unittests/IR/DataLayoutTest.cpp +++ b/llvm/unittests/IR/DataLayoutTest.cpp @@ -677,11 +677,4 @@ TEST(DataLayoutTest, VectorAlign) { EXPECT_EQ(Align(4 * 8), DL->getPrefTypeAlign(V8F32Ty)); } -TEST(DataLayoutTest, UEFI) { - Triple TT = Triple("x86_64-unknown-uefi"); - - // Test UEFI X86_64 Mangling Component. - EXPECT_STREQ(DataLayout::getManglingComponent(TT), "-m:w"); -} - } // anonymous namespace diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp index e6979cf49ce82..256756650d21f 100644 --- a/llvm/unittests/TargetParser/TripleTest.cpp +++ b/llvm/unittests/TargetParser/TripleTest.cpp @@ -10,6 +10,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/VersionTuple.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" using namespace llvm; @@ -3300,4 +3301,12 @@ TEST(TripleTest, isCompatibleWith) { EXPECT_TRUE(DoTest(C.B, C.A, C.Result)); } } + +TEST(DataLayoutTest, UEFI) { + Triple TT = Triple("x86_64-unknown-uefi"); + + // Test UEFI X86_64 Mangling Component. + EXPECT_THAT(TT.computeDataLayout(), testing::HasSubstr("-m:w-")); +} + } // end anonymous namespace From e0117a555d3c84a1c8e0101fc46fe3a34fa48ce5 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Thu, 11 Sep 2025 11:07:38 -0700 Subject: [PATCH 012/734] [lldb] Fix undefined behavior (#158119) https://green.lab.llvm.org/job/llvm.org/view/LLDB/job/lldb-cmake-sanitized/2178/consoleText ``` [2025-09-11T13:10:53.352Z] /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake-sanitized/llvm-project/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp:14138:35: runtime error: signed integer overflow: 2147483624 + 608 cannot be represented in type 'int32_t' (aka 'int') [2025-09-11T13:10:53.352Z] SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /Users/ec2-user/jenkins/workspace/llvm.org/lldb-cmake-sanitized/llvm-project/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp:14138:35 in ``` --- .../Plugins/Instruction/ARM/EmulateInstructionARM.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp index 89da4d200699f..f5f077ffb0bfc 100644 --- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp +++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp @@ -14135,7 +14135,13 @@ EmulateInstructionARM::AddWithCarry(uint32_t x, uint32_t y, uint8_t carry_in) { uint8_t overflow; uint64_t unsigned_sum = x + y + carry_in; - int64_t signed_sum = (int32_t)x + (int32_t)y + (int32_t)carry_in; + int64_t signed_sum = 0; + int32_t signed_sum32; + if (llvm::AddOverflow((int32_t)x, (int32_t)y, signed_sum32)) + signed_sum++; + signed_sum += signed_sum32; + + signed_sum += (int32_t)carry_in; result = UnsignedBits(unsigned_sum, 31, 0); // carry_out = (result == unsigned_sum ? 0 : 1); From 6ab2b8745156269024de9098a4a6495ef19d546e Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 11 Sep 2025 11:38:30 -0700 Subject: [PATCH 013/734] [libc] Clean up errno header usage in some more tests. (#157974) Either remove spurious libc_errno.h which are no longer needed, or migrate some tests to ErrnoCheckingTest to remove manual errno manipulation. --- libc/test/src/__support/CMakeLists.txt | 2 + libc/test/src/__support/str_to_fp_test.h | 1 - .../src/__support/str_to_integer_test.cpp | 1 - .../src/__support/wcs_to_integer_test.cpp | 1 - libc/test/src/poll/CMakeLists.txt | 2 +- libc/test/src/poll/poll_test.cpp | 11 +++--- libc/test/src/spawn/CMakeLists.txt | 2 +- .../spawn/posix_spawn_file_actions_test.cpp | 2 +- libc/test/src/sys/ioctl/linux/CMakeLists.txt | 2 + libc/test/src/sys/ioctl/linux/ioctl_test.cpp | 8 ++-- libc/test/src/termios/CMakeLists.txt | 1 + libc/test/src/termios/termios_test.cpp | 37 ++++++++++--------- 12 files changed, 37 insertions(+), 33 deletions(-) diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 5d1d0e0e5316b..a02514106a307 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -123,6 +123,8 @@ add_libc_test( str_to_float_test.cpp str_to_double_test.cpp str_to_long_double_test.cpp + HDRS + str_to_fp_test.h DEPENDS libc.src.__support.integer_literals libc.src.__support.str_to_float diff --git a/libc/test/src/__support/str_to_fp_test.h b/libc/test/src/__support/str_to_fp_test.h index 9b4844d410db2..d349192f107c0 100644 --- a/libc/test/src/__support/str_to_fp_test.h +++ b/libc/test/src/__support/str_to_fp_test.h @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/__support/str_to_float.h" #include "src/__support/uint128.h" diff --git a/libc/test/src/__support/str_to_integer_test.cpp b/libc/test/src/__support/str_to_integer_test.cpp index 40cb76a8bd6a2..1ec882b212b8a 100644 --- a/libc/test/src/__support/str_to_integer_test.cpp +++ b/libc/test/src/__support/str_to_integer_test.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/libc_errno.h" #include "src/__support/str_to_integer.h" #include diff --git a/libc/test/src/__support/wcs_to_integer_test.cpp b/libc/test/src/__support/wcs_to_integer_test.cpp index e4107929c15fc..4554968be67ce 100644 --- a/libc/test/src/__support/wcs_to_integer_test.cpp +++ b/libc/test/src/__support/wcs_to_integer_test.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/libc_errno.h" #include "src/__support/wcs_to_integer.h" #include diff --git a/libc/test/src/poll/CMakeLists.txt b/libc/test/src/poll/CMakeLists.txt index c4af14168b906..54e00330f2bff 100644 --- a/libc/test/src/poll/CMakeLists.txt +++ b/libc/test/src/poll/CMakeLists.txt @@ -10,5 +10,5 @@ add_libc_unittest( libc.hdr.limits_macros libc.src.errno.errno libc.src.poll.poll - libc.test.UnitTest.ErrnoSetterMatcher + libc.test.UnitTest.ErrnoCheckingTest ) diff --git a/libc/test/src/poll/poll_test.cpp b/libc/test/src/poll/poll_test.cpp index 97b7b02718172..5bf2d5e4353f6 100644 --- a/libc/test/src/poll/poll_test.cpp +++ b/libc/test/src/poll/poll_test.cpp @@ -7,18 +7,19 @@ //===----------------------------------------------------------------------===// #include "hdr/limits_macros.h" // UINT_MAX -#include "src/__support/libc_errno.h" #include "src/poll/poll.h" +#include "test/UnitTest/ErrnoCheckingTest.h" #include "test/UnitTest/Test.h" -TEST(LlvmLibcPollTest, SmokeTest) { - libc_errno = 0; +using LlvmLibcPollTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcPollTest, SmokeTest) { int ret = LIBC_NAMESPACE::poll(nullptr, 0, 0); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(0, ret); } -TEST(LlvmLibcPollTest, SmokeFailureTest) { - libc_errno = 0; + +TEST_F(LlvmLibcPollTest, SmokeFailureTest) { int ret = LIBC_NAMESPACE::poll(nullptr, UINT_MAX, 0); ASSERT_ERRNO_EQ(EINVAL); ASSERT_EQ(-1, ret); diff --git a/libc/test/src/spawn/CMakeLists.txt b/libc/test/src/spawn/CMakeLists.txt index 04814db46dca2..103925cf3a22d 100644 --- a/libc/test/src/spawn/CMakeLists.txt +++ b/libc/test/src/spawn/CMakeLists.txt @@ -7,6 +7,7 @@ add_libc_unittest( SRCS posix_spawn_file_actions_test.cpp DEPENDS + libc.hdr.errno_macros libc.hdr.stdint_proxy libc.include.spawn libc.src.spawn.file_actions @@ -15,5 +16,4 @@ add_libc_unittest( libc.src.spawn.posix_spawn_file_actions_addopen libc.src.spawn.posix_spawn_file_actions_destroy libc.src.spawn.posix_spawn_file_actions_init - libc.src.errno.errno ) diff --git a/libc/test/src/spawn/posix_spawn_file_actions_test.cpp b/libc/test/src/spawn/posix_spawn_file_actions_test.cpp index 935a3540d9a58..20ab312f1f999 100644 --- a/libc/test/src/spawn/posix_spawn_file_actions_test.cpp +++ b/libc/test/src/spawn/posix_spawn_file_actions_test.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "hdr/errno_macros.h" #include "hdr/stdint_proxy.h" -#include "src/__support/libc_errno.h" #include "src/spawn/file_actions.h" #include "src/spawn/posix_spawn_file_actions_addclose.h" #include "src/spawn/posix_spawn_file_actions_adddup2.h" diff --git a/libc/test/src/sys/ioctl/linux/CMakeLists.txt b/libc/test/src/sys/ioctl/linux/CMakeLists.txt index 2df67e9d9cbde..2ccef25f4264f 100644 --- a/libc/test/src/sys/ioctl/linux/CMakeLists.txt +++ b/libc/test/src/sys/ioctl/linux/CMakeLists.txt @@ -14,5 +14,7 @@ add_libc_unittest( libc.src.unistd.close libc.src.unistd.read libc.src.unistd.write + libc.test.UnitTest.ErrnoCheckingTest + libc.test.UnitTest.ErrnoSetterMatcher ) diff --git a/libc/test/src/sys/ioctl/linux/ioctl_test.cpp b/libc/test/src/sys/ioctl/linux/ioctl_test.cpp index b76dc14824c95..4560bcf6e2e96 100644 --- a/libc/test/src/sys/ioctl/linux/ioctl_test.cpp +++ b/libc/test/src/sys/ioctl/linux/ioctl_test.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/libc_errno.h" #include "src/fcntl/open.h" #include "src/sys/ioctl/ioctl.h" #include "src/unistd/close.h" #include "src/unistd/read.h" #include "src/unistd/write.h" - +#include "test/UnitTest/ErrnoCheckingTest.h" #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" @@ -20,11 +19,10 @@ #include "hdr/sys_ioctl_macros.h" +using LlvmLibcSysIoctlTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; -TEST(LlvmLibcSysIoctlTest, InvalidCommandAndFIONREAD) { - LIBC_NAMESPACE::libc_errno = 0; - +TEST_F(LlvmLibcSysIoctlTest, InvalidCommandAndFIONREAD) { // Setup the test file constexpr const char *TEST_FILE_NAME = "ioctl.test"; constexpr const char TEST_MSG[] = "ioctl test"; diff --git a/libc/test/src/termios/CMakeLists.txt b/libc/test/src/termios/CMakeLists.txt index 302dd300fb59f..059c272c105c4 100644 --- a/libc/test/src/termios/CMakeLists.txt +++ b/libc/test/src/termios/CMakeLists.txt @@ -18,5 +18,6 @@ add_libc_unittest( libc.src.termios.tcgetsid libc.src.termios.tcsetattr libc.src.unistd.close + libc.test.UnitTest.ErrnoCheckingTest libc.test.UnitTest.ErrnoSetterMatcher ) diff --git a/libc/test/src/termios/termios_test.cpp b/libc/test/src/termios/termios_test.cpp index 5ec169a886b1e..7a8075997a4a8 100644 --- a/libc/test/src/termios/termios_test.cpp +++ b/libc/test/src/termios/termios_test.cpp @@ -16,49 +16,52 @@ #include "src/termios/tcgetsid.h" #include "src/termios/tcsetattr.h" #include "src/unistd/close.h" +#include "test/UnitTest/ErrnoCheckingTest.h" #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" #include -using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; -using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; +using LlvmLibcTermiosTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; +using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher; // We just list a bunch of smoke tests here as it is not possible to // test functionality at the least because we want to run the tests // from ninja/make which change the terminal behavior. -TEST(LlvmLibcTermiosTest, SpeedSmokeTest) { +TEST_F(LlvmLibcTermiosTest, SpeedSmokeTest) { struct termios t; - libc_errno = 0; ASSERT_THAT(LIBC_NAMESPACE::cfsetispeed(&t, B50), Succeeds(0)); ASSERT_EQ(LIBC_NAMESPACE::cfgetispeed(&t), speed_t(B50)); ASSERT_THAT(LIBC_NAMESPACE::cfsetospeed(&t, B75), Succeeds(0)); ASSERT_EQ(LIBC_NAMESPACE::cfgetospeed(&t), speed_t(B75)); - libc_errno = 0; ASSERT_THAT(LIBC_NAMESPACE::cfsetispeed(&t, ~CBAUD), Fails(EINVAL)); - libc_errno = 0; ASSERT_THAT(LIBC_NAMESPACE::cfsetospeed(&t, ~CBAUD), Fails(EINVAL)); } -TEST(LlvmLibcTermiosTest, GetAttrSmokeTest) { +TEST_F(LlvmLibcTermiosTest, GetAttrSmokeTest) { struct termios t; - libc_errno = 0; int fd = LIBC_NAMESPACE::open("/dev/tty", O_RDONLY); - if (fd < 0) - return; // When /dev/tty is not available, no point continuing. + if (fd < 0) { + // When /dev/tty is not available, no point continuing + libc_errno = 0; + return; + } ASSERT_ERRNO_SUCCESS(); ASSERT_THAT(LIBC_NAMESPACE::tcgetattr(fd, &t), Succeeds(0)); - ASSERT_EQ(LIBC_NAMESPACE::close(fd), 0); + ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0)); } -TEST(LlvmLibcTermiosTest, TcGetSidSmokeTest) { - libc_errno = 0; +TEST_F(LlvmLibcTermiosTest, TcGetSidSmokeTest) { int fd = LIBC_NAMESPACE::open("/dev/tty", O_RDONLY); - if (fd < 0) - return; // When /dev/tty is not available, no point continuing. + if (fd < 0) { + // When /dev/tty is not available, no point continuing + libc_errno = 0; + return; + } ASSERT_ERRNO_SUCCESS(); - ASSERT_GT(LIBC_NAMESPACE::tcgetsid(fd), pid_t(0)); - ASSERT_EQ(LIBC_NAMESPACE::close(fd), 0); + ASSERT_THAT(LIBC_NAMESPACE::tcgetsid(fd), + returns(GT(pid_t(0))).with_errno(EQ(0))); + ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0)); } From 770cd432a692d9c35285fcbbd8e4fcca172ee7d7 Mon Sep 17 00:00:00 2001 From: Druzhkov Sergei Date: Thu, 11 Sep 2025 22:06:46 +0300 Subject: [PATCH 014/734] [lldb-dap] Add invalidated event (#157530) This patch fixes the problem, when after a `setVariable` request pointers and references to the variable are not updated. VSCode doesn't send a `variables` request after a `setVariable` request, so we should trigger it explicitly via`invalidated` event .Also, updated `writeMemory` request in similar way. --- .../test/tools/lldb-dap/dap_server.py | 4 +++ .../test/tools/lldb-dap/lldbdap_testcase.py | 20 +++++++++-- .../tools/lldb-dap/memory/TestDAP_memory.py | 4 +-- .../lldb-dap/variables/TestDAP_variables.py | 34 ++++++------------- lldb/tools/lldb-dap/EventHelper.cpp | 11 ++++++ lldb/tools/lldb-dap/EventHelper.h | 5 +++ .../Handler/SetVariableRequestHandler.cpp | 5 +++ .../Handler/WriteMemoryRequestHandler.cpp | 16 ++++++--- .../lldb-dap/Protocol/ProtocolEvents.cpp | 23 +++++++++++++ lldb/tools/lldb-dap/Protocol/ProtocolEvents.h | 32 +++++++++++++++++ lldb/unittests/DAP/ProtocolTypesTest.cpp | 15 ++++++++ 11 files changed, 136 insertions(+), 33 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 51debcf477a9d..9fe8ca22e820b 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -215,6 +215,7 @@ def __init__( self.terminated: bool = False self.events: List[Event] = [] self.progress_events: List[Event] = [] + self.invalidated_event: Optional[Event] = None self.reverse_requests: List[Request] = [] self.module_events: List[Dict] = [] self.sequence: int = 1 @@ -440,6 +441,8 @@ def _handle_event(self, packet: Event) -> None: elif event == "capabilities" and body: # Update the capabilities with new ones from the event. self.capabilities.update(body["capabilities"]) + elif event == "invalidated": + self.invalidated_event = packet def _handle_reverse_request(self, request: Request) -> None: if request in self.reverse_requests: @@ -1014,6 +1017,7 @@ def request_initialize(self, sourceInitFile=False): "supportsVariableType": True, "supportsStartDebuggingRequest": True, "supportsProgressReporting": True, + "supportsInvalidatedEvent": True, "$__lldb_sourceInitFile": sourceInitFile, }, } diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index fffd4c23d6fcd..a0a009ae6cc9a 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -241,6 +241,13 @@ def verify_commands(self, flavor: str, output: str, commands: list[str]): f"Command '{flavor}' - '{cmd}' not found in output: {output}", ) + def verify_invalidated_event(self, expected_areas): + event = self.dap_server.invalidated_event + self.dap_server.invalidated_event = None + self.assertIsNotNone(event) + areas = event["body"].get("areas", []) + self.assertEqual(set(expected_areas), set(areas)) + def get_dict_value(self, d: dict, key_path: list[str]) -> Any: """Verify each key in the key_path array is in contained in each dictionary within "d". Assert if any key isn't in the @@ -352,13 +359,20 @@ def get_local_as_int(self, name, threadId=None): else: return int(value) + def set_variable(self, varRef, name, value, id=None): + """Set a variable.""" + response = self.dap_server.request_setVariable(varRef, name, str(value), id=id) + if response["success"]: + self.verify_invalidated_event(["variables"]) + return response + def set_local(self, name, value, id=None): """Set a top level local variable only.""" - return self.dap_server.request_setVariable(1, name, str(value), id=id) + return self.set_variable(1, name, str(value), id=id) def set_global(self, name, value, id=None): """Set a top level global variable only.""" - return self.dap_server.request_setVariable(2, name, str(value), id=id) + return self.set_variable(2, name, str(value), id=id) def stepIn( self, @@ -577,4 +591,6 @@ def writeMemory(self, memoryReference, data=None, offset=0, allowPartial=False): response = self.dap_server.request_writeMemory( memoryReference, encodedData, offset=offset, allowPartial=allowPartial ) + if response["success"]: + self.verify_invalidated_event(["all"]) return response diff --git a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py index f51056d7020c6..7c9ad0c0f75ee 100644 --- a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py +++ b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py @@ -72,9 +72,7 @@ def test_memory_refs_set_variable(self): ptr_value = self.get_local_as_int("rawptr") self.assertIn( "memoryReference", - self.dap_server.request_setVariable(1, "rawptr", ptr_value + 2)[ - "body" - ].keys(), + self.set_local("rawptr", ptr_value + 2)["body"].keys(), ) @skipIfWindows diff --git a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py index a3a4bdaaf40a6..13a694602f230 100644 --- a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py +++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py @@ -298,7 +298,7 @@ def do_test_scopes_variables_setVariable_evaluate( # Set a variable value whose name is synthetic, like a variable index # and verify the value by reading it variable_value = 100 - response = self.dap_server.request_setVariable(varRef, "[0]", variable_value) + response = self.set_variable(varRef, "[0]", variable_value) # Verify dap sent the correct response verify_response = { "type": "int", @@ -315,7 +315,7 @@ def do_test_scopes_variables_setVariable_evaluate( # Set a variable value whose name is a real child value, like "pt.x" # and verify the value by reading it varRef = varref_dict["pt"] - self.dap_server.request_setVariable(varRef, "x", 111) + self.set_variable(varRef, "x", 111) response = self.dap_server.request_variables(varRef, start=0, count=1) value = response["body"]["variables"][0]["value"] self.assertEqual( @@ -341,27 +341,15 @@ def do_test_scopes_variables_setVariable_evaluate( self.verify_variables(verify_locals, self.dap_server.get_local_variables()) # Now we verify that we correctly change the name of a variable with and without differentiator suffix - self.assertFalse(self.dap_server.request_setVariable(1, "x2", 9)["success"]) - self.assertFalse( - self.dap_server.request_setVariable(1, "x @ main.cpp:0", 9)["success"] - ) + self.assertFalse(self.set_local("x2", 9)["success"]) + self.assertFalse(self.set_local("x @ main.cpp:0", 9)["success"]) - self.assertTrue( - self.dap_server.request_setVariable(1, "x @ main.cpp:19", 19)["success"] - ) - self.assertTrue( - self.dap_server.request_setVariable(1, "x @ main.cpp:21", 21)["success"] - ) - self.assertTrue( - self.dap_server.request_setVariable(1, "x @ main.cpp:23", 23)["success"] - ) + self.assertTrue(self.set_local("x @ main.cpp:19", 19)["success"]) + self.assertTrue(self.set_local("x @ main.cpp:21", 21)["success"]) + self.assertTrue(self.set_local("x @ main.cpp:23", 23)["success"]) # The following should have no effect - self.assertFalse( - self.dap_server.request_setVariable(1, "x @ main.cpp:23", "invalid")[ - "success" - ] - ) + self.assertFalse(self.set_local("x @ main.cpp:23", "invalid")["success"]) verify_locals["x @ main.cpp:19"]["equals"]["value"] = "19" verify_locals["x @ main.cpp:21"]["equals"]["value"] = "21" @@ -370,7 +358,7 @@ def do_test_scopes_variables_setVariable_evaluate( self.verify_variables(verify_locals, self.dap_server.get_local_variables()) # The plain x variable shold refer to the innermost x - self.assertTrue(self.dap_server.request_setVariable(1, "x", 22)["success"]) + self.assertTrue(self.set_local("x", 22)["success"]) verify_locals["x @ main.cpp:23"]["equals"]["value"] = "22" self.verify_variables(verify_locals, self.dap_server.get_local_variables()) @@ -708,9 +696,7 @@ def test_return_variables(self): self.verify_variables(verify_locals, local_variables, varref_dict) break - self.assertFalse( - self.dap_server.request_setVariable(1, "(Return Value)", 20)["success"] - ) + self.assertFalse(self.set_local("(Return Value)", 20)["success"]) @skipIfWindows def test_indexedVariables(self): diff --git a/lldb/tools/lldb-dap/EventHelper.cpp b/lldb/tools/lldb-dap/EventHelper.cpp index ecd630cb530d6..6eb468e76b16c 100644 --- a/lldb/tools/lldb-dap/EventHelper.cpp +++ b/lldb/tools/lldb-dap/EventHelper.cpp @@ -12,9 +12,11 @@ #include "JSONUtils.h" #include "LLDBUtils.h" #include "Protocol/ProtocolEvents.h" +#include "Protocol/ProtocolRequests.h" #include "Protocol/ProtocolTypes.h" #include "lldb/API/SBFileSpec.h" #include "llvm/Support/Error.h" +#include #if defined(_WIN32) #define NOMINMAX @@ -273,4 +275,13 @@ void SendProcessExitedEvent(DAP &dap, lldb::SBProcess &process) { dap.SendJSON(llvm::json::Value(std::move(event))); } +void SendInvalidatedEvent( + DAP &dap, llvm::ArrayRef areas) { + if (!dap.clientFeatures.contains(protocol::eClientFeatureInvalidatedEvent)) + return; + protocol::InvalidatedEventBody body; + body.areas = areas; + dap.Send(protocol::Event{"invalidated", std::move(body)}); +} + } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/EventHelper.h b/lldb/tools/lldb-dap/EventHelper.h index 592c1b81c46af..0c57afbaf1f33 100644 --- a/lldb/tools/lldb-dap/EventHelper.h +++ b/lldb/tools/lldb-dap/EventHelper.h @@ -10,6 +10,8 @@ #define LLDB_TOOLS_LLDB_DAP_EVENTHELPER_H #include "DAPForward.h" +#include "Protocol/ProtocolEvents.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Error.h" namespace lldb_dap { @@ -32,6 +34,9 @@ void SendContinuedEvent(DAP &dap); void SendProcessExitedEvent(DAP &dap, lldb::SBProcess &process); +void SendInvalidatedEvent( + DAP &dap, llvm::ArrayRef areas); + } // namespace lldb_dap #endif diff --git a/lldb/tools/lldb-dap/Handler/SetVariableRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/SetVariableRequestHandler.cpp index d07c0d6c9afa8..2a50dea0b4ada 100644 --- a/lldb/tools/lldb-dap/Handler/SetVariableRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/SetVariableRequestHandler.cpp @@ -9,6 +9,7 @@ #include "DAP.h" #include "EventHelper.h" #include "JSONUtils.h" +#include "Protocol/ProtocolEvents.h" #include "RequestHandler.h" using namespace lldb_dap::protocol; @@ -77,6 +78,10 @@ SetVariableRequestHandler::Run(const SetVariableArguments &args) const { if (ValuePointsToCode(variable)) body.valueLocationReference = new_var_ref; + // Also send invalidated event to signal client that some variables + // (e.g. references) can be changed. + SendInvalidatedEvent(dap, {InvalidatedEventBody::eAreaVariables}); + return body; } diff --git a/lldb/tools/lldb-dap/Handler/WriteMemoryRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/WriteMemoryRequestHandler.cpp index 313f59dceab24..3e34e488d1158 100644 --- a/lldb/tools/lldb-dap/Handler/WriteMemoryRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/WriteMemoryRequestHandler.cpp @@ -7,21 +7,24 @@ //===----------------------------------------------------------------------===// #include "DAP.h" +#include "EventHelper.h" #include "JSONUtils.h" +#include "Protocol/ProtocolEvents.h" #include "RequestHandler.h" #include "lldb/API/SBMemoryRegionInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Base64.h" +using namespace lldb_dap::protocol; + namespace lldb_dap { // Writes bytes to memory at the provided location. // // Clients should only call this request if the corresponding capability // supportsWriteMemoryRequest is true. -llvm::Expected -WriteMemoryRequestHandler::Run( - const protocol::WriteMemoryArguments &args) const { +llvm::Expected +WriteMemoryRequestHandler::Run(const WriteMemoryArguments &args) const { const lldb::addr_t address = args.memoryReference + args.offset; lldb::SBProcess process = dap.target.GetProcess(); @@ -91,8 +94,13 @@ WriteMemoryRequestHandler::Run( if (bytes_written == 0) { return llvm::make_error(write_error.GetCString()); } - protocol::WriteMemoryResponseBody response; + WriteMemoryResponseBody response; response.bytesWritten = bytes_written; + + // Also send invalidated event to signal client that some things + // (e.g. variables) can be changed. + SendInvalidatedEvent(dap, {InvalidatedEventBody::eAreaAll}); + return response; } diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolEvents.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolEvents.cpp index 4faf65567c3ea..9598c69878d66 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolEvents.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolEvents.cpp @@ -33,4 +33,27 @@ json::Value toJSON(const ModuleEventBody &MEB) { return json::Object{{"reason", MEB.reason}, {"module", MEB.module}}; } +llvm::json::Value toJSON(const InvalidatedEventBody::Area &IEBA) { + switch (IEBA) { + case InvalidatedEventBody::eAreaAll: + return "all"; + case InvalidatedEventBody::eAreaStacks: + return "stacks"; + case InvalidatedEventBody::eAreaThreads: + return "threads"; + case InvalidatedEventBody::eAreaVariables: + return "variables"; + } + llvm_unreachable("unhandled invalidated event area!."); +} + +llvm::json::Value toJSON(const InvalidatedEventBody &IEB) { + json::Object Result{{"areas", IEB.areas}}; + if (IEB.threadId) + Result.insert({"threadID", IEB.threadId}); + if (IEB.frameId) + Result.insert({"frameId", IEB.frameId}); + return Result; +} + } // namespace lldb_dap::protocol diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolEvents.h b/lldb/tools/lldb-dap/Protocol/ProtocolEvents.h index ee9e03c499eae..138b622e01210 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolEvents.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolEvents.h @@ -21,7 +21,11 @@ #define LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_EVENTS_H #include "Protocol/ProtocolTypes.h" +#include "lldb/lldb-types.h" #include "llvm/Support/JSON.h" +#include +#include +#include namespace lldb_dap::protocol { @@ -56,6 +60,34 @@ struct ModuleEventBody { llvm::json::Value toJSON(const ModuleEventBody::Reason &); llvm::json::Value toJSON(const ModuleEventBody &); +/// This event signals that some state in the debug adapter has changed and +/// requires that the client needs to re-render the data snapshot previously +/// requested. +/// +/// Debug adapters do not have to emit this event for runtime changes like +/// stopped or thread events because in that case the client refetches the new +/// state anyway. But the event can be used for example to refresh the UI after +/// rendering formatting has changed in the debug adapter. +/// +/// This event should only be sent if the corresponding capability +/// supportsInvalidatedEvent is true. +struct InvalidatedEventBody { + enum Area : unsigned { eAreaAll, eAreaStacks, eAreaThreads, eAreaVariables }; + + /// Set of logical areas that got invalidated. + std::vector areas; + + /// If specified, the client only needs to refetch data related to this + /// thread. + std::optional threadId; + + /// If specified, the client only needs to refetch data related to this stack + /// frame (and the `threadId` is ignored). + std::optional frameId; +}; +llvm::json::Value toJSON(const InvalidatedEventBody::Area &); +llvm::json::Value toJSON(const InvalidatedEventBody &); + } // end namespace lldb_dap::protocol #endif diff --git a/lldb/unittests/DAP/ProtocolTypesTest.cpp b/lldb/unittests/DAP/ProtocolTypesTest.cpp index c5d47fcb08da4..a964592495347 100644 --- a/lldb/unittests/DAP/ProtocolTypesTest.cpp +++ b/lldb/unittests/DAP/ProtocolTypesTest.cpp @@ -1073,3 +1073,18 @@ TEST(ProtocolTypesTest, CompletionsResponseBody) { ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); EXPECT_EQ(pp(*expected), pp(response)); } + +TEST(ProtocolTypesTest, InvalidatedEventBody) { + InvalidatedEventBody body; + body.areas = {InvalidatedEventBody::eAreaStacks, + InvalidatedEventBody::eAreaThreads}; + body.frameId = 1; + StringRef json = R"({ + "areas": [ + "stacks", + "threads" + ], + "frameId": 1 +})"; + EXPECT_EQ(json, pp(body)); +} From d5e7c27d53887e6ae490d8e26193a54987728458 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 11 Sep 2025 20:09:10 +0100 Subject: [PATCH 015/734] [SCEVExp] Remove special-case handling umul_with_overflow by 1 (NFCI). b50ad945dd4faa288 added umul_with_overflow simplifications to InstSimplifyFolder (used by SCEVExpander) and 9b1b93766dfa34ee9 added dead instruction cleanup to SCEVExpander. Remove special handling of umul by 1, handled automatically due to the changes above. --- .../Utils/ScalarEvolutionExpander.cpp | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 28befd0aa1ce8..45cee1e7da625 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2222,20 +2222,11 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, // Get the backedge taken count and truncate or extended to the AR type. Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty); - Value *MulV, *OfMul; - if (Step->isOne()) { - // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't - // needed, there is never an overflow, so to avoid artificially inflating - // the cost of the check, directly emit the optimized IR. - MulV = TruncTripCount; - OfMul = ConstantInt::getFalse(MulV->getContext()); - } else { - CallInst *Mul = Builder.CreateIntrinsic(Intrinsic::umul_with_overflow, Ty, - {AbsStep, TruncTripCount}, - /*FMFSource=*/nullptr, "mul"); - MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); - OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); - } + CallInst *Mul = Builder.CreateIntrinsic(Intrinsic::umul_with_overflow, Ty, + {AbsStep, TruncTripCount}, + /*FMFSource=*/nullptr, "mul"); + Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); + Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); Value *Add = nullptr, *Sub = nullptr; bool NeedPosCheck = !SE.isKnownNegative(Step); From 162755bd393b751529925c814005c79d0327c1d3 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 15:44:37 -0400 Subject: [PATCH 016/734] [libc++] Add a workflow that builds benchmarks when commenting on a PR This worfklow is still being tested, since I can't figure out how to trigger it without actually merging it to main. --- .github/workflows/libcxx-run-benchmarks.yml | 66 +++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .github/workflows/libcxx-run-benchmarks.yml diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml new file mode 100644 index 0000000000000..6ca49d0f04dc9 --- /dev/null +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -0,0 +1,66 @@ +# This file defines a workflow that runs the libc++ benchmarks when a comment is added to the PR. +# +# The comment is of the form: +# +# /libcxx-bot benchmark +# +# That will cause the specified benchmarks to be run on the PR and on the pull-request target, and +# their results to be compared. + +name: Benchmark libc++ + +permissions: + contents: read # Default everything to read-only + +on: + issue_comment: + types: + - created + - edited + +env: + CC: clang-22 + CXX: clang++-22 + COMMENT_BODY: ${{ github.event.comment.body }} + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + run-benchmarks: + if: >- + github.event.issue.pull_request && + contains(github.event.comment.body, '/libcxx-bot benchmark') + + runs-on: llvm-premerge-libcxx-next-runners # TODO: This should run on a dedicated set of machines + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + fetch-tags: true # This job requires access to all the Git branches so it can diff against (usually) main + + - uses: actions/setup-python@v6 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python3 -m venv .venv + source .venv/bin/activate + python -m pip install -r libcxx/utils/requirements.txt + + - name: Run baseline + run: | + BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -n 's/\/libcxx-bot benchmark (.+)/\1/p') + baseline_commit=$(git merge-base refs/remotes/origin/${GITHUB_BASE_REF} ${GITHUB_SHA}) + ./libcxx/utils/test-at-commit --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${BENCHMARKS} + + - name: Run candidate + run: | + BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -n 's/\/libcxx-bot benchmark (.+)/\1/p') + ./libcxx/utils/test-at-commit --commit ${GITHUB_SHA} -B build/candidate -- -sv -j1 --param optimization=speed ${BENCHMARKS} + + - name: Compare baseline and candidate runs + run: ./libcxx/utils/compare-benchmarks <(./libcxx/utils/consolidate-benchmarks build/baseline) \ + <(./libcxx/utils/consolidate-benchmarks build/candidate) From b4650a4378d58bd5a9d260994062e2b7f8a78d10 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 11 Sep 2025 12:46:47 -0700 Subject: [PATCH 017/734] [libc][bazel] Add tests and targets for inttypes (#158127) Adds tests and targets for the remaining inttypes functions. --- .../llvm-project-overlay/libc/BUILD.bazel | 22 +++++++++++++++++++ .../libc/test/src/inttypes/BUILD.bazel | 18 +++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 3e62769dd0077..09cfa4c7af02e 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -4609,6 +4609,28 @@ libc_math_function(name = "ufromfpxf16") ############################## inttypes targets ############################## +libc_function( + name = "strtoimax", + srcs = ["src/inttypes/strtoimax.cpp"], + hdrs = ["src/inttypes/strtoimax.h"], + deps = [ + ":__support_common", + ":__support_str_to_integer", + ":errno", + ], +) + +libc_function( + name = "strtoumax", + srcs = ["src/inttypes/strtoumax.cpp"], + hdrs = ["src/inttypes/strtoumax.h"], + deps = [ + ":__support_common", + ":__support_str_to_integer", + ":errno", + ], +) + libc_function( name = "imaxabs", srcs = ["src/inttypes/imaxabs.cpp"], diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/inttypes/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/inttypes/BUILD.bazel index 3dd4ab379efe0..03b8cbeecc247 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/inttypes/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/inttypes/BUILD.bazel @@ -10,6 +10,24 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) +libc_test( + name = "strtoimax_test", + srcs = ["strtoimax_test.cpp"], + deps = [ + "//libc:strtoimax", + "//libc/test/src/stdlib:strtol_test_helper", + ], +) + +libc_test( + name = "strtoumax_test", + srcs = ["strtoumax_test.cpp"], + deps = [ + "//libc:strtoumax", + "//libc/test/src/stdlib:strtol_test_helper", + ], +) + libc_test( name = "imaxabs_test", srcs = ["imaxabs_test.cpp"], From 0ab2df2e47c556a9ed892d4962c803595a219a72 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 11 Sep 2025 12:47:28 -0700 Subject: [PATCH 018/734] [libc][bazel] add tests and targets for ctype (#158124) Adds tests and targets for all the ctype functions. --- .../llvm-project-overlay/libc/BUILD.bazel | 162 ++++++++++++++++++ .../libc/test/src/ctype/BUILD.bazel | 147 ++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100644 utils/bazel/llvm-project-overlay/libc/test/src/ctype/BUILD.bazel diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 09cfa4c7af02e..d9b1bb5635aaf 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1818,6 +1818,168 @@ libc_support_library( ], ) +################################# ctype targets ################################ + +libc_function( + name = "isalnum", + srcs = ["src/ctype/isalnum.cpp"], + hdrs = ["src/ctype/isalnum.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "isalpha", + srcs = ["src/ctype/isalpha.cpp"], + hdrs = ["src/ctype/isalpha.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "isascii", + srcs = ["src/ctype/isascii.cpp"], + hdrs = ["src/ctype/isascii.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "isblank", + srcs = ["src/ctype/isblank.cpp"], + hdrs = ["src/ctype/isblank.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "iscntrl", + srcs = ["src/ctype/iscntrl.cpp"], + hdrs = ["src/ctype/iscntrl.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "isdigit", + srcs = ["src/ctype/isdigit.cpp"], + hdrs = ["src/ctype/isdigit.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "isgraph", + srcs = ["src/ctype/isgraph.cpp"], + hdrs = ["src/ctype/isgraph.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "islower", + srcs = ["src/ctype/islower.cpp"], + hdrs = ["src/ctype/islower.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "isprint", + srcs = ["src/ctype/isprint.cpp"], + hdrs = ["src/ctype/isprint.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "ispunct", + srcs = ["src/ctype/ispunct.cpp"], + hdrs = ["src/ctype/ispunct.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "isspace", + srcs = ["src/ctype/isspace.cpp"], + hdrs = ["src/ctype/isspace.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "isupper", + srcs = ["src/ctype/isupper.cpp"], + hdrs = ["src/ctype/isupper.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "isxdigit", + srcs = ["src/ctype/isxdigit.cpp"], + hdrs = ["src/ctype/isxdigit.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "toascii", + srcs = ["src/ctype/toascii.cpp"], + hdrs = ["src/ctype/toascii.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "tolower", + srcs = ["src/ctype/tolower.cpp"], + hdrs = ["src/ctype/tolower.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + +libc_function( + name = "toupper", + srcs = ["src/ctype/toupper.cpp"], + hdrs = ["src/ctype/toupper.h"], + deps = [ + ":__support_common", + ":__support_ctype_utils", + ], +) + ################################ fenv targets ################################ libc_function( diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/ctype/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/ctype/BUILD.bazel new file mode 100644 index 0000000000000..76d0dcae930bb --- /dev/null +++ b/utils/bazel/llvm-project-overlay/libc/test/src/ctype/BUILD.bazel @@ -0,0 +1,147 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Tests for LLVM libc ctype.h functions. + +load("//libc/test:libc_test_rules.bzl", "libc_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +libc_test( + name = "isalnum_test", + srcs = ["isalnum_test.cpp"], + deps = [ + "//libc:__support_cpp_span", + "//libc:isalnum", + ], +) + +libc_test( + name = "islpha_test", + srcs = ["isalpha_test.cpp"], + deps = [ + "//libc:__support_cpp_span", + "//libc:isalpha", + ], +) + +libc_test( + name = "isascii_test", + srcs = ["isascii_test.cpp"], + deps = [ + "//libc:isascii", + ], +) + +libc_test( + name = "isblank_test", + srcs = ["isblank_test.cpp"], + deps = [ + "//libc:isblank", + ], +) + +libc_test( + name = "iscntrl_test", + srcs = ["iscntrl_test.cpp"], + deps = [ + "//libc:iscntrl", + ], +) + +libc_test( + name = "isdigit_test", + srcs = ["isdigit_test.cpp"], + deps = [ + "//libc:__support_cpp_span", + "//libc:isdigit", + ], +) + +libc_test( + name = "isgraph_test", + srcs = ["isgraph_test.cpp"], + deps = [ + "//libc:isgraph", + ], +) + +libc_test( + name = "islower_test", + srcs = ["islower_test.cpp"], + deps = [ + "//libc:__support_cpp_span", + "//libc:islower", + ], +) + +libc_test( + name = "isprint_test", + srcs = ["isprint_test.cpp"], + deps = [ + "//libc:isprint", + ], +) + +libc_test( + name = "ispunct_test", + srcs = ["ispunct_test.cpp"], + deps = [ + "//libc:ispunct", + ], +) + +libc_test( + name = "isspace_test", + srcs = ["isspace_test.cpp"], + deps = [ + "//libc:isspace", + ], +) + +libc_test( + name = "isupper_test", + srcs = ["isupper_test.cpp"], + deps = [ + "//libc:__support_cpp_span", + "//libc:isupper", + ], +) + +libc_test( + name = "isxdigit_test", + srcs = ["isxdigit_test.cpp"], + deps = [ + "//libc:__support_cpp_span", + "//libc:isxdigit", + ], +) + +libc_test( + name = "toascii_test", + srcs = ["toascii_test.cpp"], + deps = [ + "//libc:toascii", + ], +) + +libc_test( + name = "tolower_test", + srcs = ["tolower_test.cpp"], + deps = [ + "//libc:__support_cpp_span", + "//libc:tolower", + ], +) + +libc_test( + name = "toupper_test", + srcs = ["toupper_test.cpp"], + deps = [ + "//libc:__support_cpp_span", + "//libc:toupper", + ], +) From e08588d4ae3ed7c81de08aaf88f3454b4985f1b3 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 15:52:32 -0400 Subject: [PATCH 019/734] [libc++] Get rid of concurrency in the libc++ benchmarking job --- .github/workflows/libcxx-build-and-test.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 2e6ff7f91b6fc..c46a8c3c2b8dc 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -29,10 +29,6 @@ on: permissions: contents: read # Default everything to read-only -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number }} - cancel-in-progress: true - jobs: stage1: if: github.repository_owner == 'llvm' From 0e3c5566c0c62a56629a927d7de5e2594d2dbe7c Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Thu, 11 Sep 2025 15:55:18 -0400 Subject: [PATCH 020/734] [PGO] Add llvm.loop.estimated_trip_count metadata (#152775) This patch implements the `llvm.loop.estimated_trip_count` metadata discussed in [[RFC] Fix Loop Transformations to Preserve Block Frequencies](https://discourse.llvm.org/t/rfc-fix-loop-transformations-to-preserve-block-frequencies/85785). As the RFC explains, that metadata enables future patches, such as PR #128785, to fix block frequency issues without losing estimated trip counts. --- llvm/docs/LangRef.rst | 48 ++++ llvm/include/llvm/IR/Metadata.h | 4 +- llvm/include/llvm/IR/ProfDataUtils.h | 4 + .../include/llvm/Transforms/Utils/LoopUtils.h | 50 +++- llvm/lib/IR/ProfDataUtils.cpp | 1 + llvm/lib/IR/Verifier.cpp | 12 + llvm/lib/Transforms/Utils/LoopUtils.cpp | 144 +++++++++--- .../LoopVectorize/AArch64/check-prof-info.ll | 218 +++++++++--------- .../Transforms/LoopVectorize/X86/pr81872.ll | 14 +- .../LoopVectorize/branch-weights.ll | 56 +++-- .../llvm.loop.estimated_trip_count.ll | 61 +++++ .../Transforms/Utils/LoopUtilsTest.cpp | 53 +++++ 12 files changed, 487 insertions(+), 178 deletions(-) create mode 100644 llvm/test/Verifier/llvm.loop.estimated_trip_count.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 43d31b03932cf..d61ea07830123 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7840,6 +7840,54 @@ If a loop was successfully processed by the loop distribution pass, this metadata is added (i.e., has been distributed). See :ref:`Transformation Metadata ` for details. +'``llvm.loop.estimated_trip_count``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata records an estimated trip count for the loop. The first operand +is the string ``llvm.loop.estimated_trip_count``. The second operand is an +integer constant of type ``i32`` or smaller specifying the estimate. For +example: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.estimated_trip_count", i32 8} + +Purpose +""""""" + +A loop's estimated trip count is an estimate of the average number of loop +iterations (specifically, the number of times the loop's header executes) each +time execution reaches the loop. It is usually only an estimate based on, for +example, profile data. The actual number of iterations might vary widely. + +The estimated trip count serves as a parameter for various loop transformations +and typically helps estimate transformation cost. For example, it can help +determine how many iterations to peel or how aggressively to unroll. + +Initialization and Maintenance +"""""""""""""""""""""""""""""" + +Passes should interact with estimated trip counts always via +``llvm::getLoopEstimatedTripCount`` and ``llvm::setLoopEstimatedTripCount``. + +When the ``llvm.loop.estimated_trip_count`` metadata is not present on a loop, +``llvm::getLoopEstimatedTripCount`` estimates the loop's trip count from the +loop's ``branch_weights`` metadata under the assumption that the latter still +accurately encodes the program's original profile data. However, as passes +transform existing loops and create new loops, they must be free to update and +create ``branch_weights`` metadata in a way that maintains accurate block +frequencies. Trip counts estimated from this new ``branch_weights`` metadata +are not necessarily useful to the passes that consume estimated trip counts. + +For this reason, when a pass transforms or creates loops, the pass should +separately estimate new trip counts based on the estimated trip counts that +``llvm::getLoopEstimatedTripCount`` returns at the start of the pass, and the +pass should record the new estimates by calling +``llvm::setLoopEstimatedTripCount``, which creates or updates +``llvm.loop.estimated_trip_count`` metadata. Once this metadata is present on a +loop, ``llvm::getLoopEstimatedTripCount`` returns its value instead of +estimating the trip count from the loop's ``branch_weights`` metadata. + '``llvm.licm.disable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 33203ad85aa32..4ba31b5545cb2 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -919,8 +919,8 @@ class MDOperand { // Check if MDOperand is of type MDString and equals `Str`. bool equalsStr(StringRef Str) const { - return isa(this->get()) && - cast(this->get())->getString() == Str; + return isa_and_nonnull(get()) && + cast(get())->getString() == Str; } ~MDOperand() { untrack(); } diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 61434735506f9..ce9f4c2de2cae 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -30,6 +30,10 @@ struct MDProfLabels { LLVM_ABI static const char *UnknownBranchWeightsMarker; }; +/// Profile-based loop metadata that should be accessed only by using +/// \c llvm::getLoopEstimatedTripCount and \c llvm::setLoopEstimatedTripCount. +LLVM_ABI extern const char *LLVMLoopEstimatedTripCount; + /// Checks if an Instruction has MD_prof Metadata LLVM_ABI bool hasProfMD(const Instruction &I); diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 96e3d3d47f2d0..5bef67eb021ca 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -323,22 +323,48 @@ LLVM_ABI TransformationMode hasLICMVersioningTransformation(const Loop *L); LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); -/// Returns a loop's estimated trip count based on branch weight metadata. -/// In addition if \p EstimatedLoopInvocationWeight is not null it is -/// initialized with weight of loop's latch leading to the exit. -/// Returns a valid positive trip count, saturated at UINT_MAX, or std::nullopt -/// when a meaningful estimate cannot be made. +/// Return either: +/// - \c std::nullopt, if the implementation is unable to handle the loop form +/// of \p L (e.g., \p L must have a latch block that controls the loop exit). +/// - The value of \c llvm.loop.estimated_trip_count from the loop metadata of +/// \p L, if that metadata is present. +/// - Else, a new estimate of the trip count from the latch branch weights of +/// \p L. +/// +/// An estimated trip count is always a valid positive trip count, saturated at +/// \c UINT_MAX. +/// +/// In addition, if \p EstimatedLoopInvocationWeight, then either: +/// - Set \c *EstimatedLoopInvocationWeight to the weight of the latch's branch +/// to the loop exit. +/// - Do not set it, and return \c std::nullopt, if the current implementation +/// cannot compute that weight (e.g., if \p L does not have a latch block that +/// controls the loop exit) or the weight is zero (because zero cannot be +/// used to compute new branch weights that reflect the estimated trip count). +/// +/// TODO: Eventually, once all passes have migrated away from setting branch +/// weights to indicate estimated trip counts, this function will drop the +/// \p EstimatedLoopInvocationWeight parameter. LLVM_ABI std::optional getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight = nullptr); -/// Set a loop's branch weight metadata to reflect that loop has \p -/// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits -/// through latch. Returns true if metadata is successfully updated, false -/// otherwise. Note that loop must have a latch block which controls loop exit -/// in order to succeed. -LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, - unsigned EstimatedLoopInvocationWeight); +/// Set \c llvm.loop.estimated_trip_count with the value \p EstimatedTripCount +/// in the loop metadata of \p L. Return false if the implementation is unable +/// to handle the loop form of \p L (e.g., \p L must have a latch block that +/// controls the loop exit). Otherwise, return true. +/// +/// In addition, if \p EstimatedLoopInvocationWeight, set the branch weight +/// metadata of \p L to reflect that \p L has an estimated +/// \p EstimatedTripCount iterations and has \c *EstimatedLoopInvocationWeight +/// exit weight through the loop's latch. +/// +/// TODO: Eventually, once all passes have migrated away from setting branch +/// weights to indicate estimated trip counts, this function will drop the +/// \p EstimatedLoopInvocationWeight parameter. +LLVM_ABI bool setLoopEstimatedTripCount( + Loop *L, unsigned EstimatedTripCount, + std::optional EstimatedLoopInvocationWeight = std::nullopt); /// Check inner loop (L) backedge count is known to be invariant on all /// iterations of its outer loop. If the loop has no parent, this is trivially diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index d0b91d9356613..5827292cee39b 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -95,6 +95,7 @@ const char *MDProfLabels::FunctionEntryCount = "function_entry_count"; const char *MDProfLabels::SyntheticFunctionEntryCount = "synthetic_function_entry_count"; const char *MDProfLabels::UnknownBranchWeightsMarker = "unknown"; +const char *LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count"; bool hasProfMD(const Instruction &I) { return I.hasMetadata(LLVMContext::MD_prof); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 7d362ce308812..c06b60fd2d9a9 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1076,6 +1076,18 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) { } } + // Check llvm.loop.estimated_trip_count. + if (MD.getNumOperands() > 0 && + MD.getOperand(0).equalsStr(LLVMLoopEstimatedTripCount)) { + Check(MD.getNumOperands() == 2, "Expected two operands", &MD); + auto *Count = dyn_cast_or_null(MD.getOperand(1)); + Check(Count && Count->getType()->isIntegerTy() && + cast(Count->getType())->getBitWidth() <= 32, + "Expected second operand to be an integer constant of type i32 or " + "smaller", + &MD); + } + // Check these last, so we diagnose problems in operands first. Check(!MD.isTemporary(), "Expected no forward declarations!", &MD); Check(MD.isResolved(), "All nodes should be resolved!", &MD); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index b172ef6ba0803..7b1a7ce6995f8 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -804,26 +804,51 @@ static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) { return LatchBR; } -/// Return the estimated trip count for any exiting branch which dominates -/// the loop latch. -static std::optional getEstimatedTripCount(BranchInst *ExitingBranch, - Loop *L, - uint64_t &OrigExitWeight) { +struct DbgLoop { + const Loop *L; + explicit DbgLoop(const Loop *L) : L(L) {} +}; + +#ifndef NDEBUG +static inline raw_ostream &operator<<(raw_ostream &OS, DbgLoop D) { + OS << "function "; + D.L->getHeader()->getParent()->printAsOperand(OS, /*PrintType=*/false); + return OS << " " << *D.L; +} +#endif // NDEBUG + +static std::optional estimateLoopTripCount(Loop *L) { + // Currently we take the estimate exit count only from the loop latch, + // ignoring other exiting blocks. This can overestimate the trip count + // if we exit through another exit, but can never underestimate it. + // TODO: incorporate information from other exits + BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L); + if (!ExitingBranch) { + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed to find exiting " + << "latch branch of required form in " << DbgLoop(L) + << "\n"); + return std::nullopt; + } + // To estimate the number of times the loop body was executed, we want to // know the number of times the backedge was taken, vs. the number of times // we exited the loop. uint64_t LoopWeight, ExitWeight; - if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) + if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) { + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed to extract branch " + << "weights for " << DbgLoop(L) << "\n"); return std::nullopt; + } if (L->contains(ExitingBranch->getSuccessor(1))) std::swap(LoopWeight, ExitWeight); - if (!ExitWeight) + if (!ExitWeight) { // Don't have a way to return predicated infinite + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed because of zero exit " + << "probability for " << DbgLoop(L) << "\n"); return std::nullopt; - - OrigExitWeight = ExitWeight; + } // Estimated exit count is a ratio of the loop weight by the weight of the // edge exiting the loop, rounded to nearest. @@ -834,43 +859,102 @@ static std::optional getEstimatedTripCount(BranchInst *ExitingBranch, return std::numeric_limits::max(); // Estimated trip count is one plus estimated exit count. - return ExitCount + 1; + uint64_t TC = ExitCount + 1; + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Estimated trip count of " << TC + << " for " << DbgLoop(L) << "\n"); + return TC; } std::optional llvm::getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight) { - // Currently we take the estimate exit count only from the loop latch, - // ignoring other exiting blocks. This can overestimate the trip count - // if we exit through another exit, but can never underestimate it. - // TODO: incorporate information from other exits - if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) { - uint64_t ExitWeight; - if (std::optional EstTripCount = - getEstimatedTripCount(LatchBranch, L, ExitWeight)) { - if (EstimatedLoopInvocationWeight) - *EstimatedLoopInvocationWeight = ExitWeight; - return *EstTripCount; - } + // If EstimatedLoopInvocationWeight, we do not support this loop if + // getExpectedExitLoopLatchBranch returns nullptr. + // + // FIXME: Also, this is a stop-gap solution for nested loops. It avoids + // mistaking LLVMLoopEstimatedTripCount metadata to be for an outer loop when + // it was created for an inner loop. The problem is that loop metadata is + // attached to the branch instruction in the loop latch block, but that can be + // shared by the loops. A solution is to attach loop metadata to loop headers + // instead, but that would be a large change to LLVM. + // + // Until that happens, we work around the problem as follows. + // getExpectedExitLoopLatchBranch (which also guards + // setLoopEstimatedTripCount) returns nullptr for a loop unless the loop has + // one latch and that latch has exactly two successors one of which is an exit + // from the loop. If the latch is shared by nested loops, then that condition + // might hold for the inner loop but cannot hold for the outer loop: + // - Because the latch is shared, it must have at least two successors: the + // inner loop header and the outer loop header, which is also an exit for + // the inner loop. That satisifies the condition for the inner loop. + // - To satsify the condition for the outer loop, the latch must have a third + // successor that is an exit for the outer loop. But that violates the + // condition for both loops. + BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L); + if (!ExitingBranch) + return std::nullopt; + + // If requested, either compute *EstimatedLoopInvocationWeight or return + // nullopt if cannot. + // + // TODO: Eventually, once all passes have migrated away from setting branch + // weights to indicate estimated trip counts, this function will drop the + // EstimatedLoopInvocationWeight parameter. + if (EstimatedLoopInvocationWeight) { + uint64_t LoopWeight = 0, ExitWeight = 0; // Inits expected to be unused. + if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) + return std::nullopt; + if (L->contains(ExitingBranch->getSuccessor(1))) + std::swap(LoopWeight, ExitWeight); + if (!ExitWeight) + return std::nullopt; + *EstimatedLoopInvocationWeight = ExitWeight; } - return std::nullopt; + + // Return the estimated trip count from metadata unless the metadata is + // missing or has no value. + if (auto TC = getOptionalIntLoopAttribute(L, LLVMLoopEstimatedTripCount)) { + LLVM_DEBUG(dbgs() << "getLoopEstimatedTripCount: " + << LLVMLoopEstimatedTripCount << " metadata has trip " + << "count of " << *TC << " for " << DbgLoop(L) << "\n"); + return TC; + } + + // Estimate the trip count from latch branch weights. + return estimateLoopTripCount(L); } -bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, - unsigned EstimatedloopInvocationWeight) { - // At the moment, we currently support changing the estimate trip count of - // the latch branch only. We could extend this API to manipulate estimated - // trip counts for any exit. +bool llvm::setLoopEstimatedTripCount( + Loop *L, unsigned EstimatedTripCount, + std::optional EstimatedloopInvocationWeight) { + // If EstimatedLoopInvocationWeight, we do not support this loop if + // getExpectedExitLoopLatchBranch returns nullptr. + // + // FIXME: See comments in getLoopEstimatedTripCount for why this is required + // here regardless of EstimatedLoopInvocationWeight. BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); if (!LatchBranch) return false; + // Set the metadata. + addStringMetadataToLoop(L, LLVMLoopEstimatedTripCount, EstimatedTripCount); + + // At the moment, we currently support changing the estimated trip count in + // the latch branch's branch weights only. We could extend this API to + // manipulate estimated trip counts for any exit. + // + // TODO: Eventually, once all passes have migrated away from setting branch + // weights to indicate estimated trip counts, we will not set branch weights + // here at all. + if (!EstimatedloopInvocationWeight) + return true; + // Calculate taken and exit weights. unsigned LatchExitWeight = 0; unsigned BackedgeTakenWeight = 0; - if (EstimatedTripCount > 0) { - LatchExitWeight = EstimatedloopInvocationWeight; + if (EstimatedTripCount != 0) { + LatchExitWeight = *EstimatedloopInvocationWeight; BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll index fc459a376710d..f39c6bd4c0d0d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll @@ -22,11 +22,11 @@ define void @foo_i32(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_BODY]]: ; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF4:![0-9]+]] +; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF5:![0-9]+]] ; CHECK-V1-IC1: [[SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i32( @@ -40,19 +40,19 @@ define void @foo_i32(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF4:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF5:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i32( @@ -64,11 +64,11 @@ define void @foo_i32(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_BODY]]: ; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF5:![0-9]+]] +; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF6:![0-9]+]] ; CHECK-V2-IC1: [[SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i32( @@ -82,19 +82,19 @@ define void @foo_i32(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_BODY]]: ; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF5:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF6:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6:![0-9]+]] +; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF10:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -124,21 +124,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_PH]]: ; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7]] +; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF9]] ; CHECK-V1-IC1: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]] +; CHECK-V1-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP15:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]] +; CHECK-V1-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF14:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i8( @@ -150,21 +150,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF10]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF12]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF8]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i8( @@ -176,21 +176,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_PH]]: ; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF10:![0-9]+]] +; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF13:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] +; CHECK-V2-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF5]] +; CHECK-V2-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF6]] ; CHECK-V2-IC1: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i8( @@ -202,21 +202,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_PH]]: ; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF5]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF1]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]] +; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF14:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF17:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF11]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -244,13 +244,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_PH]]: ; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: ; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] ; CHECK-V1-IC1: [[SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i32_no_bw( @@ -262,21 +262,21 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i32_no_bw( @@ -286,13 +286,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_PH]]: ; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] ; CHECK-V2-IC1: [[SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i32_no_bw( @@ -304,21 +304,21 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_PH]]: ; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6]] +; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7]] ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -341,74 +341,86 @@ for.cond.cleanup: ; preds = %for.body !0 = !{!"branch_weights", i32 1, i32 1023} ;. ; CHECK-V1-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} -; CHECK-V1-IC1: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} +; CHECK-V1-IC1: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]], [[META4:![0-9]+]]} ; CHECK-V1-IC1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V1-IC1: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 7} -; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V1-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META3]], [[META2]]} -; CHECK-V1-IC1: [[PROF7]] = !{!"branch_weights", i32 1, i32 31} -; CHECK-V1-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META3]]} -; CHECK-V1-IC1: [[PROF9]] = !{!"branch_weights", i32 16, i32 16} -; CHECK-V1-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META3]]} -; CHECK-V1-IC1: [[PROF11]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V1-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META2]]} -; CHECK-V1-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]]} -; CHECK-V1-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META2]]} +; CHECK-V1-IC1: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 128} +; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 7} +; CHECK-V1-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V1-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META2]], [[META8:![0-9]+]]} +; CHECK-V1-IC1: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V1-IC1: [[PROF9]] = !{!"branch_weights", i32 1, i32 31} +; CHECK-V1-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META3]], [[META11:![0-9]+]]} +; CHECK-V1-IC1: [[META11]] = !{!"llvm.loop.estimated_trip_count", i32 32} +; CHECK-V1-IC1: [[PROF12]] = !{!"branch_weights", i32 16, i32 16} +; CHECK-V1-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META8]], [[META3]]} +; CHECK-V1-IC1: [[PROF14]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V1-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META2]], [[META8]]} +; CHECK-V1-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META3]]} +; CHECK-V1-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]]} ;. ; CHECK-V1-IC1-FORCE-EPI4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]], [[META4:![0-9]+]]} ; CHECK-V1-IC1-FORCE-EPI4: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V1-IC1-FORCE-EPI4: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF4]] = !{!"branch_weights", i32 1, i32 7} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF5]] = !{!"branch_weights", i32 4, i32 4} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF8]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP9]] = distinct !{[[LOOP9]], [[META3]], [[META2]]} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF10]] = !{!"branch_weights", i32 1, i32 31} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF12]] = !{!"branch_weights", i32 4, i32 28} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META2]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]]} +; CHECK-V1-IC1-FORCE-EPI4: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 128} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF5]] = !{!"branch_weights", i32 1, i32 7} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF6]] = !{!"branch_weights", i32 4, i32 4} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META8:![0-9]+]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF9]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META2]], [[META8]]} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF12]] = !{!"branch_weights", i32 1, i32 31} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]], [[META14:![0-9]+]]} +; CHECK-V1-IC1-FORCE-EPI4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 32} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF15]] = !{!"branch_weights", i32 4, i32 28} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META8]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]], [[META8]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP20]] = distinct !{[[LOOP20]], [[META3]], [[META2]]} ;. ; CHECK-V2-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC1: [[PROF1]] = !{!"branch_weights", i32 1, i32 255} -; CHECK-V2-IC1: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK-V2-IC1: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK-V2-IC1: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V2-IC1: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V2-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]} -; CHECK-V2-IC1: [[PROF8]] = !{!"branch_weights", i32 1, i32 63} -; CHECK-V2-IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META3]], [[META4]]} -; CHECK-V2-IC1: [[PROF10]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V2-IC1: [[PROF11]] = !{!"branch_weights", i32 4, i32 12} -; CHECK-V2-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META4]]} -; CHECK-V2-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META4]], [[META3]]} -; CHECK-V2-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META4]]} -; CHECK-V2-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]], [[META3]]} +; CHECK-V2-IC1: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 256} +; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V2-IC1: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V2-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]], [[META9:![0-9]+]]} +; CHECK-V2-IC1: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V2-IC1: [[PROF10]] = !{!"branch_weights", i32 1, i32 63} +; CHECK-V2-IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META4]], [[META12:![0-9]+]]} +; CHECK-V2-IC1: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 64} +; CHECK-V2-IC1: [[PROF13]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V2-IC1: [[PROF14]] = !{!"branch_weights", i32 4, i32 12} +; CHECK-V2-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META9]], [[META4]]} +; CHECK-V2-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META4]], [[META3]], [[META9]]} +; CHECK-V2-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META4]]} +; CHECK-V2-IC1: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]]} ;. ; CHECK-V2-IC4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC4: [[PROF1]] = !{!"branch_weights", i32 1, i32 63} -; CHECK-V2-IC4: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK-V2-IC4: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK-V2-IC4: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V2-IC4: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V2-IC4: [[PROF5]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 4, i32 12} -; CHECK-V2-IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]], [[META3]]} -; CHECK-V2-IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[PROF12]] = !{!"branch_weights", i32 8, i32 56} -; CHECK-V2-IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[PROF14]] = !{!"branch_weights", i32 1, i32 7} -; CHECK-V2-IC4: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]], [[META3]]} -; CHECK-V2-IC4: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]]} +; CHECK-V2-IC4: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 64} +; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V2-IC4: [[PROF7]] = !{!"branch_weights", i32 4, i32 12} +; CHECK-V2-IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META3]], [[META9:![0-9]+]], [[META4]]} +; CHECK-V2-IC4: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V2-IC4: [[PROF10]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V2-IC4: [[PROF11]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V2-IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META4]], [[META3]], [[META9]]} +; CHECK-V2-IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META3]], [[META4]], [[META14:![0-9]+]]} +; CHECK-V2-IC4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 16} +; CHECK-V2-IC4: [[PROF15]] = !{!"branch_weights", i32 8, i32 56} +; CHECK-V2-IC4: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META9]], [[META4]]} +; CHECK-V2-IC4: [[PROF17]] = !{!"branch_weights", i32 1, i32 7} +; CHECK-V2-IC4: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]], [[META9]]} +; CHECK-V2-IC4: [[LOOP19]] = distinct !{[[LOOP19]], [[META3]], [[META4]]} +; CHECK-V2-IC4: [[LOOP20]] = distinct !{[[LOOP20]], [[META3]], [[META4]]} +; CHECK-V2-IC4: [[LOOP21]] = distinct !{[[LOOP21]], [[META4]], [[META3]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index d261827d4e111..439e1f181b5df 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -46,7 +46,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 99, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 ; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF6:![0-9]+]] ; CHECK: bb18: ; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]] @@ -55,7 +55,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90 -; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: bb6: ; CHECK-NEXT: ret void ; @@ -96,10 +96,12 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23} -; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1} -; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]} +; CHECK: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 24} +; CHECK: [[PROF6]] = !{!"branch_weights", i32 1, i32 1} +; CHECK: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]], [[META9:![0-9]+]]} +; CHECK: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} ;. diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index 7ae06953c5544..4445141549069 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -27,23 +27,23 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] ; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]: ; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] -; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]: ; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4 -; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]: ; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]: ; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]] -; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] -; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]] +; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; MAINVF4IC1_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC1_EPI4: [[LOOP]]: ; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF13:![0-9]+]], !llvm.loop [[LOOP14:![0-9]+]] ; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC1_EPI4: br label %[[EXIT]] ; MAINVF4IC1_EPI4: [[EXIT]]: @@ -70,23 +70,23 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] ; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]: ; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] -; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]: ; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4 -; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]: ; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]: ; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]] -; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] -; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; MAINVF4IC2_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC2_EPI4: [[LOOP]]: ; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF14:![0-9]+]], !llvm.loop [[LOOP15:![0-9]+]] ; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC2_EPI4: br label %[[EXIT]] ; MAINVF4IC2_EPI4: [[EXIT]]: @@ -120,28 +120,34 @@ exit: ; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} ; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127} ; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307} -; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]} ; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1} ; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} -; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3} -; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0} -; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} -; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]} +; MAINVF4IC1_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 308} +; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 0} +; MAINVF4IC1_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} +; MAINVF4IC1_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; MAINVF4IC1_EPI4: [[PROF13]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC1_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]], [[META15:![0-9]+]]} +; MAINVF4IC1_EPI4: [[META15]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. ; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13} ; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} ; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127} ; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153} -; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]} ; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1} ; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} -; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7} -; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 4} -; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} -; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3} -; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]} +; MAINVF4IC2_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 154} +; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 7} +; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 4} +; MAINVF4IC2_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; MAINVF4IC2_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} +; MAINVF4IC2_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC2_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]} +; MAINVF4IC2_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. diff --git a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll new file mode 100644 index 0000000000000..3c0bc8a39ebeb --- /dev/null +++ b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll @@ -0,0 +1,61 @@ +; Test "llvm.loop.estimated_trip_count" validation + +; DEFINE: %{RUN} = opt -passes=verify %t -disable-output 2>&1 | \ +; DEFINE: FileCheck %s -allow-empty -check-prefix + +define void @test() { +entry: + br label %body +body: + br i1 0, label %body, label %exit, !llvm.loop !0 +exit: + ret void +} +!0 = distinct !{!0, !1} + +; GOOD-NOT: {{.}} + +; BAD-VALUE: Expected second operand to be an integer constant of type i32 or smaller +; BAD-VALUE-NEXT: !1 = !{!"llvm.loop.estimated_trip_count", + +; TOO-FEW: Expected two operands +; TOO-FEW-NEXT: !1 = !{!"llvm.loop.estimated_trip_count"} + +; TOO-MANY: Expected two operands +; TOO-MANY-NEXT: !1 = !{!"llvm.loop.estimated_trip_count", i32 5, i32 5} + +; No value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count"}' >> %t +; RUN: not %{RUN} TOO-FEW + +; i16 value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i16 5}' >> %t +; RUN: %{RUN} GOOD + +; i32 value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 5}' >> %t +; RUN: %{RUN} GOOD + +; i64 value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i64 5}' >> %t +; RUN: not %{RUN} BAD-VALUE + +; MDString value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", !"5"}' >> %t +; RUN: not %{RUN} BAD-VALUE + +; MDNode value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", !2}' >> %t +; RUN: echo '!2 = !{i32 5}' >> %t +; RUN: not %{RUN} BAD-VALUE + +; Too many values. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 5, i32 5}' >> %t +; RUN: not %{RUN} TOO-MANY diff --git a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp index c22a3582bee86..ce002e9239960 100644 --- a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp @@ -142,3 +142,56 @@ TEST(LoopUtils, IsKnownNonPositiveInLoopTest) { EXPECT_EQ(isKnownNonPositiveInLoop(ArgSCEV, L, SE), true); }); } + +// The inner and outer loop here share a latch. Because any loop metadata must +// be attached to that latch, loop metadata cannot distinguish between the two +// loops. Until that problem is solved (by moving loop metadata to loops' +// header blocks instead), getLoopEstimatedTripCount and +// setLoopEstimatedTripCount must refuse to operate on at least one of the two +// loops. They choose to reject the outer loop here because the latch does not +// exit it. +TEST(LoopUtils, nestedLoopSharedLatchEstimatedTripCount) { + LLVMContext C; + std::unique_ptr M = + parseIR(C, "declare i1 @f()\n" + "declare i1 @g()\n" + "define void @foo() {\n" + "entry:\n" + " br label %outer\n" + "outer:\n" + " %c0 = call i1 @f()" + " br i1 %c0, label %inner, label %exit, !prof !0\n" + "inner:\n" + " %c1 = call i1 @g()" + " br i1 %c1, label %inner, label %outer, !prof !1\n" + "exit:\n" + " ret void\n" + "}\n" + "!0 = !{!\"branch_weights\", i32 100, i32 1}\n" + "!1 = !{!\"branch_weights\", i32 4, i32 1}\n" + "\n"); + + run(*M, "foo", + [&](Function &F, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI) { + assert(LI.end() - LI.begin() == 1 && "Expected one outer loop"); + Loop *Outer = *LI.begin(); + assert(Outer->end() - Outer->begin() == 1 && "Expected one inner loop"); + Loop *Inner = *Outer->begin(); + + // Even before llvm.loop.estimated_trip_count is added to either loop, + // getLoopEstimatedTripCount rejects the outer loop. + EXPECT_EQ(getLoopEstimatedTripCount(Inner), 5); + EXPECT_EQ(getLoopEstimatedTripCount(Outer), std::nullopt); + + // setLoopEstimatedTripCount for the inner loop does not affect + // getLoopEstimatedTripCount for the outer loop. + EXPECT_EQ(setLoopEstimatedTripCount(Inner, 100), true); + EXPECT_EQ(getLoopEstimatedTripCount(Inner), 100); + EXPECT_EQ(getLoopEstimatedTripCount(Outer), std::nullopt); + + // setLoopEstimatedTripCount rejects the outer loop. + EXPECT_EQ(setLoopEstimatedTripCount(Outer, 999), false); + EXPECT_EQ(getLoopEstimatedTripCount(Inner), 100); + EXPECT_EQ(getLoopEstimatedTripCount(Outer), std::nullopt); + }); +} From bd6e217ac6a3f3d483d6acfdbb40d5c20a79c16b Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 15:55:54 -0400 Subject: [PATCH 021/734] [libc++] Remove concurrency in the right libc++ job Early-cancellation should have been removed from libcxx-run-benchmarks.yml in the first place, not libcxx-build-and-test.yaml. --- .github/workflows/libcxx-build-and-test.yaml | 4 ++++ .github/workflows/libcxx-run-benchmarks.yml | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index c46a8c3c2b8dc..2e6ff7f91b6fc 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -29,6 +29,10 @@ on: permissions: contents: read # Default everything to read-only +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + jobs: stage1: if: github.repository_owner == 'llvm' diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml index 6ca49d0f04dc9..63763ba58e7d8 100644 --- a/.github/workflows/libcxx-run-benchmarks.yml +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -23,10 +23,6 @@ env: CXX: clang++-22 COMMENT_BODY: ${{ github.event.comment.body }} -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number }} - cancel-in-progress: true - jobs: run-benchmarks: if: >- From 4ae520bfb4f058c747799c709691bbafc80619ab Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 11 Sep 2025 19:56:01 +0000 Subject: [PATCH 022/734] [cmake] Add missing shared library dependency after f3efbce --- llvm/lib/Target/MSP430/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/MSP430/CMakeLists.txt b/llvm/lib/Target/MSP430/CMakeLists.txt index 4081d3472fd78..bcf9fd288dbd1 100644 --- a/llvm/lib/Target/MSP430/CMakeLists.txt +++ b/llvm/lib/Target/MSP430/CMakeLists.txt @@ -40,6 +40,7 @@ add_llvm_target(MSP430CodeGen SelectionDAG Support Target + TargetParser ADD_TO_COMPONENT MSP430 From 9eb17cc0343d09264ea875038901d1c6541dcef7 Mon Sep 17 00:00:00 2001 From: "Henrik G. Olsson" Date: Thu, 11 Sep 2025 12:58:59 -0700 Subject: [PATCH 023/734] [Utils] Add support for split-file to diff_test_updater (#157765) --- llvm/utils/lit/lit/DiffUpdater.py | 117 ++++++++++++++++-- llvm/utils/lit/lit/TestRunner.py | 2 +- .../tests/Inputs/diff-test-update/.gitignore | 8 ++ .../multiple-split-file-populated.in | 17 +++ .../diff-test-update/multiple-split-file.in | 13 ++ .../diff-test-update/multiple-split-file.out | 14 +++ .../single-split-file-no-expected.in | 6 + .../single-split-file-no-expected.out | 6 + .../single-split-file-populated.in | 7 ++ .../diff-test-update/single-split-file.in | 5 + .../diff-test-update/single-split-file.out | 6 + .../Inputs/diff-test-update/split-both.test | 11 ++ .../diff-test-update/split-c-comments.in | 6 + .../diff-test-update/split-c-comments.out | 6 + .../diff-test-update/split-whitespace.in | 6 + .../diff-test-update/split-whitespace.out | 6 + .../diff-test-update/unrelated-split.test | 11 ++ .../Inputs/pass-test-update/should_not_run.py | 2 +- llvm/utils/lit/tests/diff-test-update.py | 21 +++- llvm/utils/lit/tests/pass-test-update.py | 2 +- llvm/utils/update_any_test_checks.py | 2 +- 21 files changed, 260 insertions(+), 14 deletions(-) create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file-populated.in create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file.in create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file.out create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-no-expected.in create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-no-expected.out create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-populated.in create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file.in create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file.out create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/split-both.test create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/split-c-comments.in create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/split-c-comments.out create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/split-whitespace.in create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/split-whitespace.out create mode 100644 llvm/utils/lit/tests/Inputs/diff-test-update/unrelated-split.test diff --git a/llvm/utils/lit/lit/DiffUpdater.py b/llvm/utils/lit/lit/DiffUpdater.py index de0001a94f0ba..5bba2d70991df 100644 --- a/llvm/utils/lit/lit/DiffUpdater.py +++ b/llvm/utils/lit/lit/DiffUpdater.py @@ -1,37 +1,136 @@ import shutil +import os +import shlex """ This file provides the `diff_test_updater` function, which is invoked on failed RUN lines when lit is executed with --update-tests. It checks whether the failed command is `diff` and, if so, uses heuristics to determine which file is the checked-in reference file and which file is output from the test case. The heuristics are currently as follows: + - if exactly one file originates from the `split-file` command, that file is the reference file and the other is the output file - if exactly one file ends with ".expected" (common pattern in LLVM), that file is the reference file and the other is the output file - if exactly one file path contains ".tmp" (e.g. because it contains the expansion of "%t"), that file is the reference file and the other is the output file If the command matches one of these patterns the output file content is copied to the reference file to make the test pass. +If the reference file originated in `split-file`, the output file content is instead copied to the corresponding slice of the test file. Otherwise the test is ignored. Possible improvements: - Support stdin patterns like "my_binary %s | diff expected.txt" - - Scan RUN lines to see if a file is the source of output from a previous command. + - Scan RUN lines to see if a file is the source of output from a previous command (other than `split-file`). If it is then it is not a reference file that can be copied to, regardless of name, since the test will overwrite it anyways. - Only update the parts that need updating (based on the diff output). Could help avoid noisy updates when e.g. whitespace changes are ignored. """ -def get_source_and_target(a, b): +class NormalFileTarget: + def __init__(self, target): + self.target = target + + def copyFrom(self, source): + shutil.copy(source, self.target) + + def __str__(self): + return self.target + + +class SplitFileTarget: + def __init__(self, slice_start_idx, test_path, lines): + self.slice_start_idx = slice_start_idx + self.test_path = test_path + self.lines = lines + + def copyFrom(self, source): + lines_before = self.lines[: self.slice_start_idx + 1] + self.lines = self.lines[self.slice_start_idx + 1 :] + slice_end_idx = None + for i, l in enumerate(self.lines): + if SplitFileTarget._get_split_line_path(l) != None: + slice_end_idx = i + break + if slice_end_idx is not None: + lines_after = self.lines[slice_end_idx:] + else: + lines_after = [] + with open(source, "r") as f: + new_lines = lines_before + f.readlines() + lines_after + with open(self.test_path, "w") as f: + for l in new_lines: + f.write(l) + + def __str__(self): + return f"slice in {self.test_path}" + + @staticmethod + def get_target_dir(commands, test_path): + for cmd in commands: + split = shlex.split(cmd) + if "split-file" not in split: + continue + start_idx = split.index("split-file") + split = split[start_idx:] + if len(split) < 3: + continue + if split[1].strip() != test_path: + continue + return split[2].strip() + return None + + @staticmethod + def create(path, commands, test_path, target_dir): + filename = path.replace(target_dir, "") + if filename.startswith(os.sep): + filename = filename[len(os.sep) :] + with open(test_path, "r") as f: + lines = f.readlines() + for i, l in enumerate(lines): + p = SplitFileTarget._get_split_line_path(l) + if p == filename: + idx = i + break + else: + return None + return SplitFileTarget(idx, test_path, lines) + + @staticmethod + def _get_split_line_path(l): + if len(l) < 6: + return None + if l.startswith("//"): + l = l[2:] + else: + l = l[1:] + if l.startswith("--- "): + l = l[4:] + else: + return None + return l.rstrip() + + +def get_source_and_target(a, b, test_path, commands): """ Try to figure out which file is the test output and which is the reference. """ + split_target_dir = SplitFileTarget.get_target_dir(commands, test_path) + if split_target_dir: + a_target = SplitFileTarget.create(a, commands, test_path, split_target_dir) + b_target = SplitFileTarget.create(b, commands, test_path, split_target_dir) + if a_target and b_target: + return None + if a_target: + return b, a_target + if b_target: + return a, b_target + expected_suffix = ".expected" if a.endswith(expected_suffix) and not b.endswith(expected_suffix): - return b, a + return b, NormalFileTarget(a) if b.endswith(expected_suffix) and not a.endswith(expected_suffix): - return a, b + return a, NormalFileTarget(b) tmp_substr = ".tmp" if tmp_substr in a and not tmp_substr in b: - return a, b + return a, NormalFileTarget(b) if tmp_substr in b and not tmp_substr in a: - return b, a + return b, NormalFileTarget(a) return None @@ -40,16 +139,16 @@ def filter_flags(args): return [arg for arg in args if not arg.startswith("-")] -def diff_test_updater(result, test): +def diff_test_updater(result, test, commands): args = filter_flags(result.command.args) if len(args) != 3: return None [cmd, a, b] = args if cmd != "diff": return None - res = get_source_and_target(a, b) + res = get_source_and_target(a, b, test.getFilePath(), commands) if not res: return f"update-diff-test: could not deduce source and target from {a} and {b}" source, target = res - shutil.copy(source, target) + target.copyFrom(source) return f"update-diff-test: copied {source} to {target}" diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 0e32838eea1cb..cecbae61a3d73 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -1241,7 +1241,7 @@ def executeScriptInternal( ): for test_updater in litConfig.test_updaters: try: - update_output = test_updater(result, test) + update_output = test_updater(result, test, commands) except Exception as e: output = out output += err diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/.gitignore b/llvm/utils/lit/tests/Inputs/diff-test-update/.gitignore index dd373bf9e0c66..aea8ee3be4982 100644 --- a/llvm/utils/lit/tests/Inputs/diff-test-update/.gitignore +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/.gitignore @@ -1,2 +1,10 @@ ; diff-tmp-dir.test clobbers this file empty.txt +; these test cases are clobbered when run, so they're recreated each time +single-split-file.test +single-split-file-populated.test +multiple-split-file.test +multiple-split-file-populated.test +single-split-file-no-expected.test +split-c-comments.test +split whitespace.test diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file-populated.in b/llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file-populated.in new file mode 100644 index 0000000000000..e218ed6a0c6ea --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file-populated.in @@ -0,0 +1,17 @@ +# RUN: split-file %s %t +# RUN: cp %S/1.in %t/out.txt +# RUN: diff %t/test3.expected %t/out.txt + +#--- test1.expected +unrelated +#--- test2.expected +#--- test3.expected +BAR + +BAZ + +#--- test4.expected +filler +#--- test5.expected + + diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file.in b/llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file.in new file mode 100644 index 0000000000000..c47db99912c24 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file.in @@ -0,0 +1,13 @@ +# RUN: split-file %s %t +# RUN: cp %S/1.in %t/out.txt +# RUN: diff %t/test3.expected %t/out.txt + +#--- test1.expected +unrelated +#--- test2.expected +#--- test3.expected +#--- test4.expected +filler +#--- test5.expected + + diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file.out b/llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file.out new file mode 100644 index 0000000000000..c1d2782d3c2d4 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/multiple-split-file.out @@ -0,0 +1,14 @@ +# RUN: split-file %s %t +# RUN: cp %S/1.in %t/out.txt +# RUN: diff %t/test3.expected %t/out.txt + +#--- test1.expected +unrelated +#--- test2.expected +#--- test3.expected +FOO +#--- test4.expected +filler +#--- test5.expected + + diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-no-expected.in b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-no-expected.in new file mode 100644 index 0000000000000..510dc7afba16b --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-no-expected.in @@ -0,0 +1,6 @@ +# RUN: split-file %s %t +# RUN: cp %S/1.in %t/out.txt +# RUN: diff %t/test.txt %t/out.txt + +#--- test.txt + diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-no-expected.out b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-no-expected.out new file mode 100644 index 0000000000000..f52e3004aee15 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-no-expected.out @@ -0,0 +1,6 @@ +# RUN: split-file %s %t +# RUN: cp %S/1.in %t/out.txt +# RUN: diff %t/test.txt %t/out.txt + +#--- test.txt +FOO diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-populated.in b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-populated.in new file mode 100644 index 0000000000000..63042cf9b86bc --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file-populated.in @@ -0,0 +1,7 @@ +# RUN: split-file %s %t +# RUN: cp %S/1.in %t/out.txt +# RUN: diff %t/test.expected %t/out.txt + +#--- test.expected +BAR + diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file.in b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file.in new file mode 100644 index 0000000000000..422ccf2ef6813 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file.in @@ -0,0 +1,5 @@ +# RUN: split-file %s %t +# RUN: cp %S/1.in %t/out.txt +# RUN: diff %t/test.expected %t/out.txt + +#--- test.expected diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file.out b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file.out new file mode 100644 index 0000000000000..5552ad328ec5c --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/single-split-file.out @@ -0,0 +1,6 @@ +# RUN: split-file %s %t +# RUN: cp %S/1.in %t/out.txt +# RUN: diff %t/test.expected %t/out.txt + +#--- test.expected +FOO diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/split-both.test b/llvm/utils/lit/tests/Inputs/diff-test-update/split-both.test new file mode 100644 index 0000000000000..f564f446cc94b --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/split-both.test @@ -0,0 +1,11 @@ +# RUN: split-file %s %t +# RUN: diff %t/split-both.expected %t/split-both.out + +# ignore the fact that it's called ".expected" +# when comparing two files originating in split-file + +#--- split-both.expected +FOO +#--- split-both.out +BAR + diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/split-c-comments.in b/llvm/utils/lit/tests/Inputs/diff-test-update/split-c-comments.in new file mode 100644 index 0000000000000..3cda60118f5ba --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/split-c-comments.in @@ -0,0 +1,6 @@ +// RUN: split-file %s %t +// RUN: cp %S/1.in %t/out.txt +// RUN: diff %t/test.txt %t/out.txt +// +//--- test.txt + diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/split-c-comments.out b/llvm/utils/lit/tests/Inputs/diff-test-update/split-c-comments.out new file mode 100644 index 0000000000000..5020804f198b1 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/split-c-comments.out @@ -0,0 +1,6 @@ +// RUN: split-file %s %t +// RUN: cp %S/1.in %t/out.txt +// RUN: diff %t/test.txt %t/out.txt +// +//--- test.txt +FOO diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/split-whitespace.in b/llvm/utils/lit/tests/Inputs/diff-test-update/split-whitespace.in new file mode 100644 index 0000000000000..ad48d2ae4953c --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/split-whitespace.in @@ -0,0 +1,6 @@ +// RUN: split-file "%s" "%t" +// RUN: cp %S/1.in "%t/out.txt" +// RUN: diff "%t/test.txt" "%t/out.txt" +// +//--- test.txt + diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/split-whitespace.out b/llvm/utils/lit/tests/Inputs/diff-test-update/split-whitespace.out new file mode 100644 index 0000000000000..cb28124101ac6 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/split-whitespace.out @@ -0,0 +1,6 @@ +// RUN: split-file "%s" "%t" +// RUN: cp %S/1.in "%t/out.txt" +// RUN: diff "%t/test.txt" "%t/out.txt" +// +//--- test.txt +FOO diff --git a/llvm/utils/lit/tests/Inputs/diff-test-update/unrelated-split.test b/llvm/utils/lit/tests/Inputs/diff-test-update/unrelated-split.test new file mode 100644 index 0000000000000..b04eff36721de --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/diff-test-update/unrelated-split.test @@ -0,0 +1,11 @@ +# the fact that this test runs split-file is unrelated +# to the diffed files + +# RUN: mkdir %t +# RUN: split-file %s %t +# RUN: cp %S/1.in %t/unrelated-split.expected +# RUN: cp %S/2.in %t/unrelated-split.txt +# RUN: diff %t/unrelated-split.expected %t/unrelated-split.txt + +#--- distraction.txt + diff --git a/llvm/utils/lit/tests/Inputs/pass-test-update/should_not_run.py b/llvm/utils/lit/tests/Inputs/pass-test-update/should_not_run.py index 0fda62c832f08..5b39d208a2ed6 100644 --- a/llvm/utils/lit/tests/Inputs/pass-test-update/should_not_run.py +++ b/llvm/utils/lit/tests/Inputs/pass-test-update/should_not_run.py @@ -1,2 +1,2 @@ -def should_not_run(foo, bar): +def should_not_run(foo, bar, baz): raise Exception("this test updater should only run on failure") diff --git a/llvm/utils/lit/tests/diff-test-update.py b/llvm/utils/lit/tests/diff-test-update.py index c37d0dccc727c..ad14034a85a17 100644 --- a/llvm/utils/lit/tests/diff-test-update.py +++ b/llvm/utils/lit/tests/diff-test-update.py @@ -1,10 +1,29 @@ +# RUN: cp %S/Inputs/diff-test-update/single-split-file.in %S/Inputs/diff-test-update/single-split-file.test +# RUN: cp %S/Inputs/diff-test-update/single-split-file-populated.in %S/Inputs/diff-test-update/single-split-file-populated.test +# RUN: cp %S/Inputs/diff-test-update/multiple-split-file.in %S/Inputs/diff-test-update/multiple-split-file.test +# RUN: cp %S/Inputs/diff-test-update/multiple-split-file-populated.in %S/Inputs/diff-test-update/multiple-split-file-populated.test +# RUN: cp %S/Inputs/diff-test-update/single-split-file-no-expected.in %S/Inputs/diff-test-update/single-split-file-no-expected.test +# RUN: cp %S/Inputs/diff-test-update/split-c-comments.in %S/Inputs/diff-test-update/split-c-comments.test +# RUN: cp %S/Inputs/diff-test-update/split-whitespace.in "%S/Inputs/diff-test-update/split whitespace.test" + # RUN: not %{lit} --update-tests -v %S/Inputs/diff-test-update | FileCheck %s +# RUN: diff %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file.test +# RUN: diff %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file-populated.test +# RUN: diff %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file.test +# RUN: diff %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file-populated.test +# RUN: diff %S/Inputs/diff-test-update/single-split-file-no-expected.out %S/Inputs/diff-test-update/single-split-file-no-expected.test +# RUN: diff %S/Inputs/diff-test-update/split-c-comments.out %S/Inputs/diff-test-update/split-c-comments.test +# RUN: diff %S/Inputs/diff-test-update/split-whitespace.out "%S/Inputs/diff-test-update/split whitespace.test" + + # CHECK: # update-diff-test: could not deduce source and target from {{.*}}1.in and {{.*}}2.in # CHECK: # update-diff-test: could not deduce source and target from {{.*}}1.txt and {{.*}}2.txt # CHECK: # update-diff-test: copied {{.*}}my-file.txt to {{.*}}my-file.expected # CHECK: # update-diff-test: copied {{.*}}1.txt to {{.*}}empty.txt # CHECK: # update-diff-test: copied {{.*}}diff-tmp.test.tmp.txt to {{.*}}diff-t-out.txt +# CHECK: # update-diff-test: could not deduce source and target from {{.*}}split-both.expected and {{.*}}split-both.out +# CHECK: # update-diff-test: copied {{.*}}unrelated-split.txt to {{.*}}unrelated-split.expected -# CHECK: Failed: 5 (100.00%) +# CHECK: Failed: 14 (100.00%) diff --git a/llvm/utils/lit/tests/pass-test-update.py b/llvm/utils/lit/tests/pass-test-update.py index 00a4025be660e..2e9f1be2bccab 100644 --- a/llvm/utils/lit/tests/pass-test-update.py +++ b/llvm/utils/lit/tests/pass-test-update.py @@ -12,7 +12,7 @@ # CHECK: Exception occurred in test updater: # CHECK: Traceback (most recent call last): # CHECK: File {{.*}}, line {{.*}}, in {{.*}} -# CHECK: update_output = test_updater(result, test) +# CHECK: update_output = test_updater(result, test, commands) # CHECK: File "{{.*}}{{/|\\}}should_not_run.py", line {{.*}}, in should_not_run # CHECK: raise Exception("this test updater should only run on failure") # CHECK: Exception: this test updater should only run on failure diff --git a/llvm/utils/update_any_test_checks.py b/llvm/utils/update_any_test_checks.py index 76fe336593929..ec277f140a34f 100755 --- a/llvm/utils/update_any_test_checks.py +++ b/llvm/utils/update_any_test_checks.py @@ -63,7 +63,7 @@ def expand_listfile_args(arg_list): return exp_arg_list -def utc_lit_plugin(result, test): +def utc_lit_plugin(result, test, commands): testname = test.getFilePath() if not testname: return None From 73e64e534f32f0223b5ddcf8d0f6ba7bc7e47fad Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 16:05:27 -0400 Subject: [PATCH 024/734] [libc++] Fix sed pattern to extract benchmarks from the comment body --- .github/workflows/libcxx-run-benchmarks.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml index 63763ba58e7d8..98fa016a8949e 100644 --- a/.github/workflows/libcxx-run-benchmarks.yml +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -48,13 +48,13 @@ jobs: - name: Run baseline run: | - BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -n 's/\/libcxx-bot benchmark (.+)/\1/p') + BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -nE 's/\/libcxx-bot benchmark (.+)/\1/p') baseline_commit=$(git merge-base refs/remotes/origin/${GITHUB_BASE_REF} ${GITHUB_SHA}) ./libcxx/utils/test-at-commit --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${BENCHMARKS} - name: Run candidate run: | - BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -n 's/\/libcxx-bot benchmark (.+)/\1/p') + BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -nE 's/\/libcxx-bot benchmark (.+)/\1/p') ./libcxx/utils/test-at-commit --commit ${GITHUB_SHA} -B build/candidate -- -sv -j1 --param optimization=speed ${BENCHMARKS} - name: Compare baseline and candidate runs From 05a705efda4bf62f54eed1bcb97e212ae823c585 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Thu, 11 Sep 2025 13:10:33 -0700 Subject: [PATCH 025/734] [AMDGPU] Restrict to VGPR only for mfma scale operands (#158117) Restrict to VGPR only (VRegSrc_32) for mfma scale operands to workaround a hardware design defect: For all Inline/SGPR constants, SP HW use bits [30:23] as the scale. TODO: We may still be able to allow Inline Constants/SGPR, with a proper shift, to obtain a potentially better performance. Fixes: SWDEV-548629 --- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 9 +- ....amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll | 295 ++++-- ...m.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll | 883 ++++++++++++------ .../AMDGPU/mai-hazards-mfma-scale.gfx950.mir | 72 +- llvm/test/MC/AMDGPU/mai-gfx950-err.s | 48 + llvm/test/MC/AMDGPU/mai-gfx950.s | 68 -- .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 54 -- 7 files changed, 855 insertions(+), 574 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 6f778a0d262af..f7279b664ed27 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -966,9 +966,14 @@ class MAIInst : MAIInst { // Append operands from V_MFMA_LD_SCALE_B32, but we need to rename them. + // Restrict to VGPR only (VRegSrc_32) for the scale operands to workaround a + // hardware design defect: For all Inline/SGPR constants, SP HW use bits + // [30:23] as the scale. + // TODO: We may still be able to allow Inline Constants/SGPR, with a proper + // shift, to obtain a potentially better performance. let InOperandList = !con(BaseInst.InOperandList, - (ins VSrc_b32:$scale_src0, - VSrc_b32:$scale_src1, + (ins VRegSrc_32:$scale_src0, + VRegSrc_32:$scale_src1, op_sel0:$src0_modifiers, op_sel_hi0:$src1_modifiers)); let AsmOperands = diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll index f78ea92b4840b..17ae6dd23b199 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll @@ -1425,9 +1425,10 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__sgpr_scaleA__sgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a1, v17 ; GCN-NEXT: v_accvgpr_write_b32 a2, v18 ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 -; GCN-NEXT: v_mov_b32_e32 v16, s1 +; GCN-NEXT: v_mov_b32_e32 v16, s0 +; GCN-NEXT: v_mov_b32_e32 v17, s1 ; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0] +; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1447,8 +1448,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__sgpr_scaleA__vgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a1, v17 ; GCN-NEXT: v_accvgpr_write_b32 a2, v18 ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 +; GCN-NEXT: v_mov_b32_e32 v16, s0 ; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v20 op_sel_hi:[0,0,0] +; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v20 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1468,8 +1470,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__vgpr_scaleA__sgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a1, v17 ; GCN-NEXT: v_accvgpr_write_b32 a2, v18 ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 +; GCN-NEXT: v_mov_b32_e32 v16, s0 ; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, s0 op_sel_hi:[0,0,0] +; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v16 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1567,8 +1570,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], s20, v12 op_sel_hi:[0,0,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v8, v12 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1592,8 +1596,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 +; GISEL-NEXT: v_mov_b32_e32 v8, s20 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], s20, v12 op_sel_hi:[0,0,0] +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v8, v12 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1621,8 +1626,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, s20 op_sel_hi:[0,0,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, v8 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1646,8 +1652,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 +; GISEL-NEXT: v_mov_b32_e32 v8, s20 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, s20 op_sel_hi:[0,0,0] +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, v8 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1675,8 +1682,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, s20 op_sel_hi:[0,0,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, v8 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1700,8 +1708,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 +; GISEL-NEXT: v_mov_b32_e32 v8, s20 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, s20 op_sel_hi:[0,0,0] +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, v8 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1721,8 +1730,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; GCN-NEXT: v_accvgpr_write_b32 a1, s1 ; GCN-NEXT: v_accvgpr_write_b32 a2, s2 ; GCN-NEXT: v_accvgpr_write_b32 a3, s3 +; GCN-NEXT: v_mov_b32_e32 v17, s16 ; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, s16 op_sel_hi:[0,0,0] +; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1750,8 +1760,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a1, s21 ; SDAG-NEXT: v_accvgpr_write_b32 a2, s22 ; SDAG-NEXT: v_accvgpr_write_b32 a3, s23 +; SDAG-NEXT: v_mov_b32_e32 v9, s24 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, s24 op_sel_hi:[0,0,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, v9 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1775,8 +1786,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a1, s21 ; GISEL-NEXT: v_accvgpr_write_b32 a2, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a3, s23 +; GISEL-NEXT: v_mov_b32_e32 v9, s24 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, s24 op_sel_hi:[0,0,0] +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, v9 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1789,22 +1801,43 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp } define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__scaleB_inlineimm(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { -; GCN-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__scaleB_inlineimm: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v16 -; GCN-NEXT: v_accvgpr_write_b32 a1, v17 -; GCN-NEXT: v_accvgpr_write_b32 a2, v18 -; GCN-NEXT: v_accvgpr_write_b32 a3, v19 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[1,1,0] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__scaleB_inlineimm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v20, -2 +; SDAG-NEXT: v_mov_b32_e32 v21, 33 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v21, v20 op_sel_hi:[1,1,0] +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__scaleB_inlineimm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 +; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 +; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 +; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 +; GISEL-NEXT: v_mov_b32_e32 v16, 33 +; GISEL-NEXT: v_mov_b32_e32 v17, -2 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1,0] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 +; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 +; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 +; GISEL-NEXT: v_accvgpr_read_b32 v3, a3 +; GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 2, i32 33, i32 2, i32 -2) ret <4 x float> %result } @@ -1813,13 +1846,14 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scaleB_inlineimm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_movk_i32 s0, 0x41 +; SDAG-NEXT: v_mov_b32_e32 v20, -2 +; SDAG-NEXT: v_mov_b32_e32 v21, 0x41 ; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[1,1,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v21, v20 op_sel_hi:[1,1,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1836,8 +1870,9 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 ; GISEL-NEXT: v_mov_b32_e32 v16, 0x41 +; GISEL-NEXT: v_mov_b32_e32 v17, -2 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[1,1,0] +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1853,14 +1888,14 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scaleB_kimm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_movk_i32 s0, 0x41 +; SDAG-NEXT: v_mov_b32_e32 v20, 0x4d +; SDAG-NEXT: v_mov_b32_e32 v21, 0x41 ; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 -; SDAG-NEXT: v_mov_b32_e32 v16, 0x4d ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[1,1,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v21, v20 op_sel_hi:[1,1,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 @@ -1919,9 +1954,10 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; SDAG-NEXT: v_mov_b32_e32 v17, s9 ; SDAG-NEXT: v_mov_b32_e32 v18, s10 ; SDAG-NEXT: v_mov_b32_e32 v19, s11 -; SDAG-NEXT: v_mov_b32_e32 v21, s13 +; SDAG-NEXT: v_mov_b32_e32 v21, s12 +; SDAG-NEXT: v_mov_b32_e32 v22, s13 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s12, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v21, v22 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[14:15] @@ -1942,9 +1978,10 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[26:27] -; GISEL-NEXT: v_mov_b32_e32 v20, s29 +; GISEL-NEXT: v_mov_b32_e32 v20, s28 +; GISEL-NEXT: v_mov_b32_e32 v21, s29 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s28, v20 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 2 @@ -1960,8 +1997,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 -; SDAG-NEXT: s_movk_i32 s6, 0x41 -; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 +; SDAG-NEXT: v_mov_b32_e32 v21, -2 +; SDAG-NEXT: v_mov_b32_e32 v22, 0x41 ; SDAG-NEXT: v_mov_b32_e32 v20, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_mov_b32_e32 v0, s8 @@ -1983,18 +2021,19 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG-NEXT: v_mov_b32_e32 v15, s23 ; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[0:1] ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s6, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v22, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 -; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[4:5] +; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 ; GISEL-NEXT: v_mov_b32_e32 v20, 0x41 -; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 +; GISEL-NEXT: v_mov_b32_e32 v21, -2 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] @@ -2007,11 +2046,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[0:1] ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 2 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] +; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 65, i32 1, i32 -2) store <4 x float> %result, ptr addrspace(1) %ptr, align 16 @@ -2023,8 +2062,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 -; SDAG-NEXT: s_movk_i32 s6, 0x41 -; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 +; SDAG-NEXT: v_mov_b32_e32 v21, 1.0 +; SDAG-NEXT: v_mov_b32_e32 v22, 0x41 ; SDAG-NEXT: v_mov_b32_e32 v20, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_mov_b32_e32 v0, s8 @@ -2046,18 +2086,19 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG-NEXT: v_mov_b32_e32 v15, s23 ; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[0:1] ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s6, 1.0 op_sel:[1,1,0] op_sel_hi:[1,0,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v22, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 -; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[4:5] +; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_kimm__scaleB__FP_literal: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 ; GISEL-NEXT: v_mov_b32_e32 v20, 0x41 -; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 +; GISEL-NEXT: v_mov_b32_e32 v21, 1.0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] @@ -2070,11 +2111,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[0:1] ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, 1.0 op_sel:[1,1,0] op_sel_hi:[1,0,0] +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 2 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] +; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 65, i32 1, i32 1065353216) store <4 x float> %result, ptr addrspace(1) %ptr, align 16 @@ -2086,8 +2127,10 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 +; SDAG-NEXT: v_mov_b32_e32 v21, -2 +; SDAG-NEXT: v_mov_b32_e32 v22, 1.0 ; SDAG-NEXT: v_mov_b32_e32 v20, 0 -; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 @@ -2108,16 +2151,19 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG-NEXT: v_mov_b32_e32 v15, s23 ; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[0:1] ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], 1.0, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v22, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 -; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[4:5] +; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_FP_literal__scaleB__inline_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 +; GISEL-NEXT: v_mov_b32_e32 v20, 1.0 +; GISEL-NEXT: v_mov_b32_e32 v21, -2 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] @@ -2129,14 +2175,12 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[0:1] -; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], 1.0, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] +; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 1065353216, i32 1, i32 -2) store <4 x float> %result, ptr addrspace(1) %ptr, align 16 @@ -2148,8 +2192,10 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 +; SDAG-NEXT: v_mov_b32_e32 v21, 0.15915494 +; SDAG-NEXT: v_mov_b32_e32 v22, 1.0 ; SDAG-NEXT: v_mov_b32_e32 v20, 0 -; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 @@ -2170,16 +2216,19 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG-NEXT: v_mov_b32_e32 v15, s23 ; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[0:1] ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], 1.0, 0.15915494 op_sel:[1,1,0] op_sel_hi:[1,0,0] +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v22, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 -; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[4:5] +; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_FP_literal__scaleB__FP_literal: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 +; GISEL-NEXT: v_mov_b32_e32 v20, 1.0 +; GISEL-NEXT: v_mov_b32_e32 v21, 0.15915494 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] @@ -2191,14 +2240,12 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[0:1] -; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], 1.0, 0.15915494 op_sel:[1,1,0] op_sel_hi:[1,0,0] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] +; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 1065353216, i32 1, i32 1042479491) store <4 x float> %result, ptr addrspace(1) %ptr, align 16 @@ -2250,43 +2297,85 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_b( } define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_1(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { -; GCN-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_1: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v16 -; GCN-NEXT: v_accvgpr_write_b32 a1, v17 -; GCN-NEXT: v_accvgpr_write_b32 a2, v18 -; GCN-NEXT: v_accvgpr_write_b32 a3, v19 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 1 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_1: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v20, 1 +; SDAG-NEXT: v_mov_b32_e32 v21, 0 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v21, v20 op_sel_hi:[0,0,0] +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_1: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 +; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 +; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 +; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 +; GISEL-NEXT: v_mov_b32_e32 v16, 0 +; GISEL-NEXT: v_mov_b32_e32 v17, 1 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 +; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 +; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 +; GISEL-NEXT: v_accvgpr_read_b32 v3, a3 +; GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1) ret <4 x float> %result } define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_1_0_a(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { -; GCN-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_1_0_a: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v16 -; GCN-NEXT: v_accvgpr_write_b32 a1, v17 -; GCN-NEXT: v_accvgpr_write_b32 a2, v18 -; GCN-NEXT: v_accvgpr_write_b32 a3, v19 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 1, 0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_1_0_a: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v20, 0 +; SDAG-NEXT: v_mov_b32_e32 v21, 1 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v21, v20 op_sel_hi:[0,0,0] +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_1_0_a: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 +; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 +; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 +; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 +; GISEL-NEXT: v_mov_b32_e32 v16, 1 +; GISEL-NEXT: v_mov_b32_e32 v17, 0 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 +; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 +; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 +; GISEL-NEXT: v_accvgpr_read_b32 v3, a3 +; GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0) ret <4 x float> %result } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll index 24af3fa5ff9b7..839f0324227ca 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll @@ -3387,10 +3387,11 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__sgpr_scaleA__sgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a12, v28 ; GCN-NEXT: v_accvgpr_write_b32 a13, v29 ; GCN-NEXT: v_accvgpr_write_b32 a14, v30 -; GCN-NEXT: v_mov_b32_e32 v16, s1 +; GCN-NEXT: v_mov_b32_e32 v16, s0 +; GCN-NEXT: v_mov_b32_e32 v17, s1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] +; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, v17 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 3 @@ -3436,9 +3437,10 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__sgpr_scaleA__vgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a12, v28 ; GCN-NEXT: v_accvgpr_write_b32 a13, v29 ; GCN-NEXT: v_accvgpr_write_b32 a14, v30 +; GCN-NEXT: v_mov_b32_e32 v16, s0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[0,0,0] +; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, v31 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 3 @@ -3484,9 +3486,10 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgpr_scaleA__sgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a12, v28 ; GCN-NEXT: v_accvgpr_write_b32 a13, v29 ; GCN-NEXT: v_accvgpr_write_b32 a14, v30 +; GCN-NEXT: v_mov_b32_e32 v16, s0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, s0 op_sel_hi:[0,0,0] +; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v16 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 3 @@ -3659,8 +3662,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: v_accvgpr_write_b32 a13, v21 ; SDAG-NEXT: v_accvgpr_write_b32 a14, v22 ; SDAG-NEXT: v_accvgpr_write_b32 a15, v23 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], s20, v24 op_sel_hi:[0,0,0] +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v8, v24 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 @@ -3709,8 +3713,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: v_accvgpr_write_b32 a13, v21 ; GISEL-NEXT: v_accvgpr_write_b32 a14, v22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, v23 +; GISEL-NEXT: v_mov_b32_e32 v8, s20 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], s20, v24 op_sel_hi:[0,0,0] +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v8, v24 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 @@ -3763,8 +3768,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a13, v21 ; SDAG-NEXT: v_accvgpr_write_b32 a14, v22 ; SDAG-NEXT: v_accvgpr_write_b32 a15, v23 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, s20 op_sel_hi:[0,0,0] +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, v8 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 @@ -3813,8 +3819,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a13, v21 ; GISEL-NEXT: v_accvgpr_write_b32 a14, v22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, v23 +; GISEL-NEXT: v_mov_b32_e32 v8, s20 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, s20 op_sel_hi:[0,0,0] +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, v8 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 @@ -3867,8 +3874,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a13, v21 ; SDAG-NEXT: v_accvgpr_write_b32 a14, v22 ; SDAG-NEXT: v_accvgpr_write_b32 a15, v23 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, s20 op_sel_hi:[0,0,0] +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, v8 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 @@ -3917,8 +3925,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a13, v21 ; GISEL-NEXT: v_accvgpr_write_b32 a14, v22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, v23 +; GISEL-NEXT: v_mov_b32_e32 v8, s20 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, s20 op_sel_hi:[0,0,0] +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, v8 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 @@ -3963,8 +3972,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; GCN-NEXT: v_accvgpr_write_b32 a13, s25 ; GCN-NEXT: v_accvgpr_write_b32 a14, s26 ; GCN-NEXT: v_accvgpr_write_b32 a15, s27 +; GCN-NEXT: v_mov_b32_e32 v17, s28 ; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, s28 op_sel_hi:[0,0,0] +; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, v17 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 3 @@ -4114,48 +4124,95 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp } define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_inlineimm__scaleB_inlineimm(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { -; GCN-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_inlineimm__scaleB_inlineimm: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: scratch_load_dword a15, off, s32 -; GCN-NEXT: v_accvgpr_write_b32 a0, v16 -; GCN-NEXT: v_accvgpr_write_b32 a1, v17 -; GCN-NEXT: v_accvgpr_write_b32 a2, v18 -; GCN-NEXT: v_accvgpr_write_b32 a3, v19 -; GCN-NEXT: v_accvgpr_write_b32 a4, v20 -; GCN-NEXT: v_accvgpr_write_b32 a5, v21 -; GCN-NEXT: v_accvgpr_write_b32 a6, v22 -; GCN-NEXT: v_accvgpr_write_b32 a7, v23 -; GCN-NEXT: v_accvgpr_write_b32 a8, v24 -; GCN-NEXT: v_accvgpr_write_b32 a9, v25 -; GCN-NEXT: v_accvgpr_write_b32 a10, v26 -; GCN-NEXT: v_accvgpr_write_b32 a11, v27 -; GCN-NEXT: v_accvgpr_write_b32 a12, v28 -; GCN-NEXT: v_accvgpr_write_b32 a13, v29 -; GCN-NEXT: v_accvgpr_write_b32 a14, v30 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 33, -2 op_sel_hi:[1,1,0] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: v_accvgpr_read_b32 v4, a4 -; GCN-NEXT: v_accvgpr_read_b32 v5, a5 -; GCN-NEXT: v_accvgpr_read_b32 v6, a6 -; GCN-NEXT: v_accvgpr_read_b32 v7, a7 -; GCN-NEXT: v_accvgpr_read_b32 v8, a8 -; GCN-NEXT: v_accvgpr_read_b32 v9, a9 -; GCN-NEXT: v_accvgpr_read_b32 v10, a10 -; GCN-NEXT: v_accvgpr_read_b32 v11, a11 -; GCN-NEXT: v_accvgpr_read_b32 v12, a12 -; GCN-NEXT: v_accvgpr_read_b32 v13, a13 -; GCN-NEXT: v_accvgpr_read_b32 v14, a14 -; GCN-NEXT: v_accvgpr_read_b32 v15, a15 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_inlineimm__scaleB_inlineimm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: scratch_load_dword a15, off, s32 +; SDAG-NEXT: v_mov_b32_e32 v31, -2 +; SDAG-NEXT: v_mov_b32_e32 v32, 33 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v27 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v28 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v29 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v30 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[1,1,0] +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_inlineimm__scaleB_inlineimm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: scratch_load_dword a15, off, s32 +; GISEL-NEXT: v_mov_b32_e32 v31, 33 +; GISEL-NEXT: v_mov_b32_e32 v32, -2 +; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 +; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 +; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 +; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 +; GISEL-NEXT: v_accvgpr_write_b32 a4, v20 +; GISEL-NEXT: v_accvgpr_write_b32 a5, v21 +; GISEL-NEXT: v_accvgpr_write_b32 a6, v22 +; GISEL-NEXT: v_accvgpr_write_b32 a7, v23 +; GISEL-NEXT: v_accvgpr_write_b32 a8, v24 +; GISEL-NEXT: v_accvgpr_write_b32 a9, v25 +; GISEL-NEXT: v_accvgpr_write_b32 a10, v26 +; GISEL-NEXT: v_accvgpr_write_b32 a11, v27 +; GISEL-NEXT: v_accvgpr_write_b32 a12, v28 +; GISEL-NEXT: v_accvgpr_write_b32 a13, v29 +; GISEL-NEXT: v_accvgpr_write_b32 a14, v30 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[1,1,0] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 +; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 +; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 +; GISEL-NEXT: v_accvgpr_read_b32 v3, a3 +; GISEL-NEXT: v_accvgpr_read_b32 v4, a4 +; GISEL-NEXT: v_accvgpr_read_b32 v5, a5 +; GISEL-NEXT: v_accvgpr_read_b32 v6, a6 +; GISEL-NEXT: v_accvgpr_read_b32 v7, a7 +; GISEL-NEXT: v_accvgpr_read_b32 v8, a8 +; GISEL-NEXT: v_accvgpr_read_b32 v9, a9 +; GISEL-NEXT: v_accvgpr_read_b32 v10, a10 +; GISEL-NEXT: v_accvgpr_read_b32 v11, a11 +; GISEL-NEXT: v_accvgpr_read_b32 v12, a12 +; GISEL-NEXT: v_accvgpr_read_b32 v13, a13 +; GISEL-NEXT: v_accvgpr_read_b32 v14, a14 +; GISEL-NEXT: v_accvgpr_read_b32 v15, a15 +; GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 2, i32 33, i32 2, i32 -2) ret <16 x float> %result } @@ -4165,7 +4222,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SDAG-NEXT: scratch_load_dword a15, off, s32 -; SDAG-NEXT: s_movk_i32 s0, 0x41 +; SDAG-NEXT: v_mov_b32_e32 v31, -2 +; SDAG-NEXT: v_mov_b32_e32 v32, 0x41 ; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 @@ -4183,7 +4241,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_accvgpr_write_b32 a14, v30 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, -2 op_sel_hi:[1,1,0] +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[1,1,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 @@ -4210,6 +4268,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: scratch_load_dword a15, off, s32 ; GISEL-NEXT: v_mov_b32_e32 v31, 0x41 +; GISEL-NEXT: v_mov_b32_e32 v32, -2 ; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 @@ -4227,7 +4286,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_accvgpr_write_b32 a14, v30 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, -2 op_sel_hi:[1,1,0] +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[1,1,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 @@ -4257,7 +4316,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SDAG-NEXT: scratch_load_dword a15, off, s32 -; SDAG-NEXT: s_movk_i32 s0, 0x41 +; SDAG-NEXT: v_mov_b32_e32 v31, 1.0 +; SDAG-NEXT: v_mov_b32_e32 v32, 0x41 ; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 @@ -4275,7 +4335,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_accvgpr_write_b32 a14, v30 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, 1.0 op_sel_hi:[1,1,0] +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[1,1,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 @@ -4302,6 +4362,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: scratch_load_dword a15, off, s32 ; GISEL-NEXT: v_mov_b32_e32 v31, 0x41 +; GISEL-NEXT: v_mov_b32_e32 v32, 1.0 ; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 @@ -4319,7 +4380,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_accvgpr_write_b32 a14, v30 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, 1.0 op_sel_hi:[1,1,0] +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[1,1,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 3 @@ -4345,106 +4406,12 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale } define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_FP_literal__scaleB_inlineimm(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { -; GCN-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_FP_literal__scaleB_inlineimm: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: scratch_load_dword a15, off, s32 -; GCN-NEXT: v_accvgpr_write_b32 a0, v16 -; GCN-NEXT: v_accvgpr_write_b32 a1, v17 -; GCN-NEXT: v_accvgpr_write_b32 a2, v18 -; GCN-NEXT: v_accvgpr_write_b32 a3, v19 -; GCN-NEXT: v_accvgpr_write_b32 a4, v20 -; GCN-NEXT: v_accvgpr_write_b32 a5, v21 -; GCN-NEXT: v_accvgpr_write_b32 a6, v22 -; GCN-NEXT: v_accvgpr_write_b32 a7, v23 -; GCN-NEXT: v_accvgpr_write_b32 a8, v24 -; GCN-NEXT: v_accvgpr_write_b32 a9, v25 -; GCN-NEXT: v_accvgpr_write_b32 a10, v26 -; GCN-NEXT: v_accvgpr_write_b32 a11, v27 -; GCN-NEXT: v_accvgpr_write_b32 a12, v28 -; GCN-NEXT: v_accvgpr_write_b32 a13, v29 -; GCN-NEXT: v_accvgpr_write_b32 a14, v30 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 1.0, -2 op_sel_hi:[1,1,0] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: v_accvgpr_read_b32 v4, a4 -; GCN-NEXT: v_accvgpr_read_b32 v5, a5 -; GCN-NEXT: v_accvgpr_read_b32 v6, a6 -; GCN-NEXT: v_accvgpr_read_b32 v7, a7 -; GCN-NEXT: v_accvgpr_read_b32 v8, a8 -; GCN-NEXT: v_accvgpr_read_b32 v9, a9 -; GCN-NEXT: v_accvgpr_read_b32 v10, a10 -; GCN-NEXT: v_accvgpr_read_b32 v11, a11 -; GCN-NEXT: v_accvgpr_read_b32 v12, a12 -; GCN-NEXT: v_accvgpr_read_b32 v13, a13 -; GCN-NEXT: v_accvgpr_read_b32 v14, a14 -; GCN-NEXT: v_accvgpr_read_b32 v15, a15 -; GCN-NEXT: s_setpc_b64 s[30:31] - %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 2, i32 1065353216, i32 2, i32 -2) - ret <16 x float> %result -} - -define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_FP_literal__scaleB_FP_literal(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { -; GCN-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_FP_literal__scaleB_FP_literal: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: scratch_load_dword a15, off, s32 -; GCN-NEXT: v_accvgpr_write_b32 a0, v16 -; GCN-NEXT: v_accvgpr_write_b32 a1, v17 -; GCN-NEXT: v_accvgpr_write_b32 a2, v18 -; GCN-NEXT: v_accvgpr_write_b32 a3, v19 -; GCN-NEXT: v_accvgpr_write_b32 a4, v20 -; GCN-NEXT: v_accvgpr_write_b32 a5, v21 -; GCN-NEXT: v_accvgpr_write_b32 a6, v22 -; GCN-NEXT: v_accvgpr_write_b32 a7, v23 -; GCN-NEXT: v_accvgpr_write_b32 a8, v24 -; GCN-NEXT: v_accvgpr_write_b32 a9, v25 -; GCN-NEXT: v_accvgpr_write_b32 a10, v26 -; GCN-NEXT: v_accvgpr_write_b32 a11, v27 -; GCN-NEXT: v_accvgpr_write_b32 a12, v28 -; GCN-NEXT: v_accvgpr_write_b32 a13, v29 -; GCN-NEXT: v_accvgpr_write_b32 a14, v30 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 0.15915494, 1.0 op_sel_hi:[1,1,0] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: v_accvgpr_read_b32 v4, a4 -; GCN-NEXT: v_accvgpr_read_b32 v5, a5 -; GCN-NEXT: v_accvgpr_read_b32 v6, a6 -; GCN-NEXT: v_accvgpr_read_b32 v7, a7 -; GCN-NEXT: v_accvgpr_read_b32 v8, a8 -; GCN-NEXT: v_accvgpr_read_b32 v9, a9 -; GCN-NEXT: v_accvgpr_read_b32 v10, a10 -; GCN-NEXT: v_accvgpr_read_b32 v11, a11 -; GCN-NEXT: v_accvgpr_read_b32 v12, a12 -; GCN-NEXT: v_accvgpr_read_b32 v13, a13 -; GCN-NEXT: v_accvgpr_read_b32 v14, a14 -; GCN-NEXT: v_accvgpr_read_b32 v15, a15 -; GCN-NEXT: s_setpc_b64 s[30:31] - %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 2, i32 1042479491, i32 2, i32 1065353216) - ret <16 x float> %result -} - -define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scaleB_kimm(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { -; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scaleB_kimm: +; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_FP_literal__scaleB_inlineimm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SDAG-NEXT: scratch_load_dword a15, off, s32 -; SDAG-NEXT: s_movk_i32 s0, 0x41 -; SDAG-NEXT: v_mov_b32_e32 v31, 0x4d +; SDAG-NEXT: v_mov_b32_e32 v31, -2 +; SDAG-NEXT: v_mov_b32_e32 v32, 1.0 ; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 @@ -4462,7 +4429,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_accvgpr_write_b32 a14, v30 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[1,1,0] +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[1,1,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 3 @@ -4484,12 +4451,12 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scaleB_kimm: +; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_FP_literal__scaleB_inlineimm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: scratch_load_dword a15, off, s32 -; GISEL-NEXT: v_mov_b32_e32 v31, 0x41 -; GISEL-NEXT: v_mov_b32_e32 v32, 0x4d +; GISEL-NEXT: v_mov_b32_e32 v31, 1.0 +; GISEL-NEXT: v_mov_b32_e32 v32, -2 ; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 @@ -4528,44 +4495,233 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_accvgpr_read_b32 v14, a14 ; GISEL-NEXT: v_accvgpr_read_b32 v15, a15 ; GISEL-NEXT: s_setpc_b64 s[30:31] - %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 2, i32 65, i32 2, i32 77) + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 2, i32 1065353216, i32 2, i32 -2) ret <16 x float> %result } -define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1, ptr addrspace(1) %ptr) #0 { -; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd: +define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_FP_literal__scaleB_FP_literal(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { +; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_FP_literal__scaleB_FP_literal: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 -; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 -; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x80 -; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[36:37] -; SDAG-NEXT: v_mov_b32_e32 v16, s8 -; SDAG-NEXT: v_mov_b32_e32 v17, s9 -; SDAG-NEXT: v_mov_b32_e32 v18, s10 -; SDAG-NEXT: v_mov_b32_e32 v19, s11 -; SDAG-NEXT: v_mov_b32_e32 v20, s12 -; SDAG-NEXT: v_mov_b32_e32 v21, s13 -; SDAG-NEXT: v_mov_b32_e32 v22, s14 -; SDAG-NEXT: v_mov_b32_e32 v23, s15 -; SDAG-NEXT: v_mov_b32_e32 v24, s16 -; SDAG-NEXT: v_mov_b32_e32 v25, s17 -; SDAG-NEXT: v_mov_b32_e32 v26, s18 -; SDAG-NEXT: v_mov_b32_e32 v27, s19 -; SDAG-NEXT: v_mov_b32_e32 v28, s20 -; SDAG-NEXT: v_mov_b32_e32 v29, s21 -; SDAG-NEXT: v_mov_b32_e32 v30, s22 -; SDAG-NEXT: v_mov_b32_e32 v31, s23 -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[38:39] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[40:41] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[42:43] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[44:45] +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: scratch_load_dword a15, off, s32 +; SDAG-NEXT: v_mov_b32_e32 v31, 1.0 +; SDAG-NEXT: v_mov_b32_e32 v32, 0.15915494 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v27 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v28 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v29 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v30 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[1,1,0] +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_FP_literal__scaleB_FP_literal: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: scratch_load_dword a15, off, s32 +; GISEL-NEXT: v_mov_b32_e32 v31, 0.15915494 +; GISEL-NEXT: v_mov_b32_e32 v32, 1.0 +; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 +; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 +; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 +; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 +; GISEL-NEXT: v_accvgpr_write_b32 a4, v20 +; GISEL-NEXT: v_accvgpr_write_b32 a5, v21 +; GISEL-NEXT: v_accvgpr_write_b32 a6, v22 +; GISEL-NEXT: v_accvgpr_write_b32 a7, v23 +; GISEL-NEXT: v_accvgpr_write_b32 a8, v24 +; GISEL-NEXT: v_accvgpr_write_b32 a9, v25 +; GISEL-NEXT: v_accvgpr_write_b32 a10, v26 +; GISEL-NEXT: v_accvgpr_write_b32 a11, v27 +; GISEL-NEXT: v_accvgpr_write_b32 a12, v28 +; GISEL-NEXT: v_accvgpr_write_b32 a13, v29 +; GISEL-NEXT: v_accvgpr_write_b32 a14, v30 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[1,1,0] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 +; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 +; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 +; GISEL-NEXT: v_accvgpr_read_b32 v3, a3 +; GISEL-NEXT: v_accvgpr_read_b32 v4, a4 +; GISEL-NEXT: v_accvgpr_read_b32 v5, a5 +; GISEL-NEXT: v_accvgpr_read_b32 v6, a6 +; GISEL-NEXT: v_accvgpr_read_b32 v7, a7 +; GISEL-NEXT: v_accvgpr_read_b32 v8, a8 +; GISEL-NEXT: v_accvgpr_read_b32 v9, a9 +; GISEL-NEXT: v_accvgpr_read_b32 v10, a10 +; GISEL-NEXT: v_accvgpr_read_b32 v11, a11 +; GISEL-NEXT: v_accvgpr_read_b32 v12, a12 +; GISEL-NEXT: v_accvgpr_read_b32 v13, a13 +; GISEL-NEXT: v_accvgpr_read_b32 v14, a14 +; GISEL-NEXT: v_accvgpr_read_b32 v15, a15 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 2, i32 1042479491, i32 2, i32 1065353216) + ret <16 x float> %result +} + +define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scaleB_kimm(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { +; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scaleB_kimm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: scratch_load_dword a15, off, s32 +; SDAG-NEXT: v_mov_b32_e32 v31, 0x4d +; SDAG-NEXT: v_mov_b32_e32 v32, 0x41 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v27 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v28 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v29 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v30 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[1,1,0] +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scaleB_kimm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: scratch_load_dword a15, off, s32 +; GISEL-NEXT: v_mov_b32_e32 v31, 0x41 +; GISEL-NEXT: v_mov_b32_e32 v32, 0x4d +; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 +; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 +; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 +; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 +; GISEL-NEXT: v_accvgpr_write_b32 a4, v20 +; GISEL-NEXT: v_accvgpr_write_b32 a5, v21 +; GISEL-NEXT: v_accvgpr_write_b32 a6, v22 +; GISEL-NEXT: v_accvgpr_write_b32 a7, v23 +; GISEL-NEXT: v_accvgpr_write_b32 a8, v24 +; GISEL-NEXT: v_accvgpr_write_b32 a9, v25 +; GISEL-NEXT: v_accvgpr_write_b32 a10, v26 +; GISEL-NEXT: v_accvgpr_write_b32 a11, v27 +; GISEL-NEXT: v_accvgpr_write_b32 a12, v28 +; GISEL-NEXT: v_accvgpr_write_b32 a13, v29 +; GISEL-NEXT: v_accvgpr_write_b32 a14, v30 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[1,1,0] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 +; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 +; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 +; GISEL-NEXT: v_accvgpr_read_b32 v3, a3 +; GISEL-NEXT: v_accvgpr_read_b32 v4, a4 +; GISEL-NEXT: v_accvgpr_read_b32 v5, a5 +; GISEL-NEXT: v_accvgpr_read_b32 v6, a6 +; GISEL-NEXT: v_accvgpr_read_b32 v7, a7 +; GISEL-NEXT: v_accvgpr_read_b32 v8, a8 +; GISEL-NEXT: v_accvgpr_read_b32 v9, a9 +; GISEL-NEXT: v_accvgpr_read_b32 v10, a10 +; GISEL-NEXT: v_accvgpr_read_b32 v11, a11 +; GISEL-NEXT: v_accvgpr_read_b32 v12, a12 +; GISEL-NEXT: v_accvgpr_read_b32 v13, a13 +; GISEL-NEXT: v_accvgpr_read_b32 v14, a14 +; GISEL-NEXT: v_accvgpr_read_b32 v15, a15 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 2, i32 65, i32 2, i32 77) + ret <16 x float> %result +} + +define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1, ptr addrspace(1) %ptr) #0 { +; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 +; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x80 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[36:37] +; SDAG-NEXT: v_mov_b32_e32 v16, s8 +; SDAG-NEXT: v_mov_b32_e32 v17, s9 +; SDAG-NEXT: v_mov_b32_e32 v18, s10 +; SDAG-NEXT: v_mov_b32_e32 v19, s11 +; SDAG-NEXT: v_mov_b32_e32 v20, s12 +; SDAG-NEXT: v_mov_b32_e32 v21, s13 +; SDAG-NEXT: v_mov_b32_e32 v22, s14 +; SDAG-NEXT: v_mov_b32_e32 v23, s15 +; SDAG-NEXT: v_mov_b32_e32 v24, s16 +; SDAG-NEXT: v_mov_b32_e32 v25, s17 +; SDAG-NEXT: v_mov_b32_e32 v26, s18 +; SDAG-NEXT: v_mov_b32_e32 v27, s19 +; SDAG-NEXT: v_mov_b32_e32 v28, s20 +; SDAG-NEXT: v_mov_b32_e32 v29, s21 +; SDAG-NEXT: v_mov_b32_e32 v30, s22 +; SDAG-NEXT: v_mov_b32_e32 v31, s23 +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[38:39] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[40:41] +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[42:43] +; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[44:45] ; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[50:51] -; SDAG-NEXT: v_mov_b32_e32 v32, s1 +; SDAG-NEXT: v_mov_b32_e32 v32, s0 +; SDAG-NEXT: v_mov_b32_e32 v33, s1 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], s0, v32 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], v32, v33 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 @@ -4598,9 +4754,10 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] -; GISEL-NEXT: v_mov_b32_e32 v32, s1 +; GISEL-NEXT: v_mov_b32_e32 v32, s0 +; GISEL-NEXT: v_mov_b32_e32 v33, s1 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], s0, v32 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], v32, v33 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v16, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 @@ -4620,7 +4777,8 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 -; SDAG-NEXT: s_movk_i32 s2, 0x41 +; SDAG-NEXT: v_mov_b32_e32 v32, -2 +; SDAG-NEXT: v_mov_b32_e32 v33, 0x41 ; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_mov_b32_e32 v16, s8 @@ -4648,7 +4806,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[50:51] ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], s2, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], v33, v32 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 @@ -4664,6 +4822,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 ; GISEL-NEXT: v_mov_b32_e32 v32, 0x41 +; GISEL-NEXT: v_mov_b32_e32 v33, -2 ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[14:15] @@ -4683,7 +4842,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], v32, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], v32, v33 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v16, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 @@ -4738,9 +4897,10 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__nonmac(<8 x ; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 ; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 ; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 -; SDAG-NEXT: v_mov_b32_e32 v0, s1 +; SDAG-NEXT: v_mov_b32_e32 v0, s0 +; SDAG-NEXT: v_mov_b32_e32 v1, s1 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[2:9], v[10:17], a[0:15], s0, v0 op_sel_hi:[0,0,0] +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[2:9], v[10:17], a[0:15], v0, v1 op_sel_hi:[0,0,0] ; SDAG-NEXT: v_mov_b32_e32 v2, s20 ; SDAG-NEXT: v_mov_b32_e32 v3, s21 ; SDAG-NEXT: v_mov_b32_e32 v4, s22 @@ -4811,10 +4971,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__nonmac(<8 x ; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 ; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 -; GISEL-NEXT: v_mov_b32_e32 v20, s1 +; GISEL-NEXT: v_mov_b32_e32 v20, s0 +; GISEL-NEXT: v_mov_b32_e32 v21, s1 ; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 ; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v20 op_sel_hi:[0,0,0] +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v20, v21 op_sel_hi:[0,0,0] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] @@ -4852,24 +5013,26 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx16 s[12:27], s[4:5], 0x0 +; SDAG-NEXT: v_mov_b32_e32 v0, 42 +; SDAG-NEXT: v_mov_b32_e32 v1, 25 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v0, s12 -; SDAG-NEXT: v_mov_b32_e32 v1, s13 -; SDAG-NEXT: v_mov_b32_e32 v2, s14 -; SDAG-NEXT: v_mov_b32_e32 v3, s15 -; SDAG-NEXT: v_mov_b32_e32 v4, s16 -; SDAG-NEXT: v_mov_b32_e32 v5, s17 -; SDAG-NEXT: v_mov_b32_e32 v6, s18 -; SDAG-NEXT: v_mov_b32_e32 v7, s19 -; SDAG-NEXT: v_mov_b32_e32 v8, s20 -; SDAG-NEXT: v_mov_b32_e32 v9, s21 -; SDAG-NEXT: v_mov_b32_e32 v10, s22 -; SDAG-NEXT: v_mov_b32_e32 v11, s23 +; SDAG-NEXT: v_mov_b32_e32 v2, s12 +; SDAG-NEXT: v_mov_b32_e32 v3, s13 +; SDAG-NEXT: v_mov_b32_e32 v4, s14 +; SDAG-NEXT: v_mov_b32_e32 v5, s15 +; SDAG-NEXT: v_mov_b32_e32 v6, s16 +; SDAG-NEXT: v_mov_b32_e32 v7, s17 +; SDAG-NEXT: v_mov_b32_e32 v8, s18 +; SDAG-NEXT: v_mov_b32_e32 v9, s19 +; SDAG-NEXT: v_mov_b32_e32 v10, s20 +; SDAG-NEXT: v_mov_b32_e32 v11, s21 +; SDAG-NEXT: v_mov_b32_e32 v12, s22 +; SDAG-NEXT: v_mov_b32_e32 v13, s23 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 -; SDAG-NEXT: v_mov_b32_e32 v12, s24 -; SDAG-NEXT: v_mov_b32_e32 v13, s25 -; SDAG-NEXT: v_mov_b32_e32 v14, s26 -; SDAG-NEXT: v_mov_b32_e32 v15, s27 +; SDAG-NEXT: v_mov_b32_e32 v14, s24 +; SDAG-NEXT: v_mov_b32_e32 v15, s25 +; SDAG-NEXT: v_mov_b32_e32 v16, s26 +; SDAG-NEXT: v_mov_b32_e32 v17, s27 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 ; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 @@ -4888,7 +5051,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 ; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2 +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[2:9], v[10:17], a[0:15], v1, v0 op_sel_hi:[0,0,0] blgp:2 ; SDAG-NEXT: v_mov_b32_e32 v2, s20 ; SDAG-NEXT: v_mov_b32_e32 v3, s21 ; SDAG-NEXT: v_mov_b32_e32 v4, s22 @@ -4931,9 +5094,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 +; GISEL-NEXT: v_mov_b32_e32 v20, 25 +; GISEL-NEXT: v_mov_b32_e32 v21, 42 ; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 -; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39] @@ -4959,14 +5122,15 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 ; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 ; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2 +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v20, v21 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] @@ -4978,7 +5142,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 @@ -5123,6 +5287,8 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_nonmac: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx16 s[12:27], s[4:5], 0x0 +; SDAG-NEXT: v_mov_b32_e32 v32, 42 +; SDAG-NEXT: v_mov_b32_e32 v33, 25 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_mov_b32_e32 v16, s12 ; SDAG-NEXT: v_mov_b32_e32 v17, s13 @@ -5151,7 +5317,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2 +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], v33, v32 op_sel_hi:[0,0,0] blgp:2 ; SDAG-NEXT: v_mov_b32_e32 v16, s20 ; SDAG-NEXT: v_mov_b32_e32 v17, s21 ; SDAG-NEXT: v_mov_b32_e32 v18, s22 @@ -5195,9 +5361,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 -; GISEL-NEXT: v_mov_b64_e32 v[32:33], 0 +; GISEL-NEXT: v_mov_b32_e32 v32, 25 +; GISEL-NEXT: v_mov_b32_e32 v33, 42 ; GISEL-NEXT: v_mov_b64_e32 v[34:35], 16 -; GISEL-NEXT: v_mov_b64_e32 v[36:37], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[36:37] ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[38:39] @@ -5215,10 +5381,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[36:37], 32 ; GISEL-NEXT: v_mov_b64_e32 v[38:39], 48 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2 +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], v32, v33 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[32:33], 0 ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] ; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[18:19] @@ -5234,7 +5401,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[38:39], v[28:31], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v[32:33], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[34:35], v[4:7], off sc0 sc1 @@ -5345,95 +5512,189 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_0_b( } define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_1(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { -; GCN-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_1: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: scratch_load_dword a15, off, s32 -; GCN-NEXT: v_accvgpr_write_b32 a0, v16 -; GCN-NEXT: v_accvgpr_write_b32 a1, v17 -; GCN-NEXT: v_accvgpr_write_b32 a2, v18 -; GCN-NEXT: v_accvgpr_write_b32 a3, v19 -; GCN-NEXT: v_accvgpr_write_b32 a4, v20 -; GCN-NEXT: v_accvgpr_write_b32 a5, v21 -; GCN-NEXT: v_accvgpr_write_b32 a6, v22 -; GCN-NEXT: v_accvgpr_write_b32 a7, v23 -; GCN-NEXT: v_accvgpr_write_b32 a8, v24 -; GCN-NEXT: v_accvgpr_write_b32 a9, v25 -; GCN-NEXT: v_accvgpr_write_b32 a10, v26 -; GCN-NEXT: v_accvgpr_write_b32 a11, v27 -; GCN-NEXT: v_accvgpr_write_b32 a12, v28 -; GCN-NEXT: v_accvgpr_write_b32 a13, v29 -; GCN-NEXT: v_accvgpr_write_b32 a14, v30 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 0, 1 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: v_accvgpr_read_b32 v4, a4 -; GCN-NEXT: v_accvgpr_read_b32 v5, a5 -; GCN-NEXT: v_accvgpr_read_b32 v6, a6 -; GCN-NEXT: v_accvgpr_read_b32 v7, a7 -; GCN-NEXT: v_accvgpr_read_b32 v8, a8 -; GCN-NEXT: v_accvgpr_read_b32 v9, a9 -; GCN-NEXT: v_accvgpr_read_b32 v10, a10 -; GCN-NEXT: v_accvgpr_read_b32 v11, a11 -; GCN-NEXT: v_accvgpr_read_b32 v12, a12 -; GCN-NEXT: v_accvgpr_read_b32 v13, a13 -; GCN-NEXT: v_accvgpr_read_b32 v14, a14 -; GCN-NEXT: v_accvgpr_read_b32 v15, a15 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_1: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: scratch_load_dword a15, off, s32 +; SDAG-NEXT: v_mov_b32_e32 v31, 1 +; SDAG-NEXT: v_mov_b32_e32 v32, 0 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v27 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v28 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v29 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v30 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_1: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: scratch_load_dword a15, off, s32 +; GISEL-NEXT: v_mov_b32_e32 v31, 0 +; GISEL-NEXT: v_mov_b32_e32 v32, 1 +; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 +; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 +; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 +; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 +; GISEL-NEXT: v_accvgpr_write_b32 a4, v20 +; GISEL-NEXT: v_accvgpr_write_b32 a5, v21 +; GISEL-NEXT: v_accvgpr_write_b32 a6, v22 +; GISEL-NEXT: v_accvgpr_write_b32 a7, v23 +; GISEL-NEXT: v_accvgpr_write_b32 a8, v24 +; GISEL-NEXT: v_accvgpr_write_b32 a9, v25 +; GISEL-NEXT: v_accvgpr_write_b32 a10, v26 +; GISEL-NEXT: v_accvgpr_write_b32 a11, v27 +; GISEL-NEXT: v_accvgpr_write_b32 a12, v28 +; GISEL-NEXT: v_accvgpr_write_b32 a13, v29 +; GISEL-NEXT: v_accvgpr_write_b32 a14, v30 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 +; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 +; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 +; GISEL-NEXT: v_accvgpr_read_b32 v3, a3 +; GISEL-NEXT: v_accvgpr_read_b32 v4, a4 +; GISEL-NEXT: v_accvgpr_read_b32 v5, a5 +; GISEL-NEXT: v_accvgpr_read_b32 v6, a6 +; GISEL-NEXT: v_accvgpr_read_b32 v7, a7 +; GISEL-NEXT: v_accvgpr_read_b32 v8, a8 +; GISEL-NEXT: v_accvgpr_read_b32 v9, a9 +; GISEL-NEXT: v_accvgpr_read_b32 v10, a10 +; GISEL-NEXT: v_accvgpr_read_b32 v11, a11 +; GISEL-NEXT: v_accvgpr_read_b32 v12, a12 +; GISEL-NEXT: v_accvgpr_read_b32 v13, a13 +; GISEL-NEXT: v_accvgpr_read_b32 v14, a14 +; GISEL-NEXT: v_accvgpr_read_b32 v15, a15 +; GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1) ret <16 x float> %result } define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_1_0_a(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { -; GCN-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_1_0_a: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: scratch_load_dword a15, off, s32 -; GCN-NEXT: v_accvgpr_write_b32 a0, v16 -; GCN-NEXT: v_accvgpr_write_b32 a1, v17 -; GCN-NEXT: v_accvgpr_write_b32 a2, v18 -; GCN-NEXT: v_accvgpr_write_b32 a3, v19 -; GCN-NEXT: v_accvgpr_write_b32 a4, v20 -; GCN-NEXT: v_accvgpr_write_b32 a5, v21 -; GCN-NEXT: v_accvgpr_write_b32 a6, v22 -; GCN-NEXT: v_accvgpr_write_b32 a7, v23 -; GCN-NEXT: v_accvgpr_write_b32 a8, v24 -; GCN-NEXT: v_accvgpr_write_b32 a9, v25 -; GCN-NEXT: v_accvgpr_write_b32 a10, v26 -; GCN-NEXT: v_accvgpr_write_b32 a11, v27 -; GCN-NEXT: v_accvgpr_write_b32 a12, v28 -; GCN-NEXT: v_accvgpr_write_b32 a13, v29 -; GCN-NEXT: v_accvgpr_write_b32 a14, v30 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 1, 0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: v_accvgpr_read_b32 v4, a4 -; GCN-NEXT: v_accvgpr_read_b32 v5, a5 -; GCN-NEXT: v_accvgpr_read_b32 v6, a6 -; GCN-NEXT: v_accvgpr_read_b32 v7, a7 -; GCN-NEXT: v_accvgpr_read_b32 v8, a8 -; GCN-NEXT: v_accvgpr_read_b32 v9, a9 -; GCN-NEXT: v_accvgpr_read_b32 v10, a10 -; GCN-NEXT: v_accvgpr_read_b32 v11, a11 -; GCN-NEXT: v_accvgpr_read_b32 v12, a12 -; GCN-NEXT: v_accvgpr_read_b32 v13, a13 -; GCN-NEXT: v_accvgpr_read_b32 v14, a14 -; GCN-NEXT: v_accvgpr_read_b32 v15, a15 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_1_0_a: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: scratch_load_dword a15, off, s32 +; SDAG-NEXT: v_mov_b32_e32 v31, 0 +; SDAG-NEXT: v_mov_b32_e32 v32, 1 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v27 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v28 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v29 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v30 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_1_0_a: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: scratch_load_dword a15, off, s32 +; GISEL-NEXT: v_mov_b32_e32 v31, 1 +; GISEL-NEXT: v_mov_b32_e32 v32, 0 +; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 +; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 +; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 +; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 +; GISEL-NEXT: v_accvgpr_write_b32 a4, v20 +; GISEL-NEXT: v_accvgpr_write_b32 a5, v21 +; GISEL-NEXT: v_accvgpr_write_b32 a6, v22 +; GISEL-NEXT: v_accvgpr_write_b32 a7, v23 +; GISEL-NEXT: v_accvgpr_write_b32 a8, v24 +; GISEL-NEXT: v_accvgpr_write_b32 a9, v25 +; GISEL-NEXT: v_accvgpr_write_b32 a10, v26 +; GISEL-NEXT: v_accvgpr_write_b32 a11, v27 +; GISEL-NEXT: v_accvgpr_write_b32 a12, v28 +; GISEL-NEXT: v_accvgpr_write_b32 a13, v29 +; GISEL-NEXT: v_accvgpr_write_b32 a14, v30 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 +; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 +; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 +; GISEL-NEXT: v_accvgpr_read_b32 v3, a3 +; GISEL-NEXT: v_accvgpr_read_b32 v4, a4 +; GISEL-NEXT: v_accvgpr_read_b32 v5, a5 +; GISEL-NEXT: v_accvgpr_read_b32 v6, a6 +; GISEL-NEXT: v_accvgpr_read_b32 v7, a7 +; GISEL-NEXT: v_accvgpr_read_b32 v8, a8 +; GISEL-NEXT: v_accvgpr_read_b32 v9, a9 +; GISEL-NEXT: v_accvgpr_read_b32 v10, a10 +; GISEL-NEXT: v_accvgpr_read_b32 v11, a11 +; GISEL-NEXT: v_accvgpr_read_b32 v12, a12 +; GISEL-NEXT: v_accvgpr_read_b32 v13, a13 +; GISEL-NEXT: v_accvgpr_read_b32 v14, a14 +; GISEL-NEXT: v_accvgpr_read_b32 v15, a15 +; GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0) ret <16 x float> %result } diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir index 4585eca8fe894..c01c2be23b83f 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir @@ -157,19 +157,19 @@ name: V_MFMA_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64___xdl_write_vgpr__c tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-LABEL: name: V_MFMA_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64___xdl_write_vgpr__cbsz0_blgp0____xdl_read_overlap_vgpr_srcC - ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $vgpr33, killed $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec - renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $vgpr33, killed $vgpr32, 12, 4, implicit $mode, implicit $exec S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -180,18 +180,18 @@ name: V_MFMA_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64___xdl_write_vgpr__c tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-LABEL: name: V_MFMA_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64___xdl_write_vgpr__cbsz2_blgp2____xdl_read_overlap_vgpr_srcC - ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $vgpr33, killed $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec - renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $vgpr33, killed $vgpr32, 12, 4, implicit $mode, implicit $exec S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -202,19 +202,19 @@ name: V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64___xdl_write_v tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-LABEL: name: V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64___xdl_write_vgpr__cbsz0_blgp0____xdl_read_overlap_vgpr_srcC - ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec - renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -225,18 +225,18 @@ name: V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64___xdl_write_v tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-LABEL: name: V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64___xdl_write_vgpr__cbsz2_blgp2____xdl_read_overlap_vgpr_srcC - ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec - renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $vgpr33, $vgpr32, 12, 4, implicit $mode, implicit $exec S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -247,18 +247,18 @@ name: V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64___xdl_write_ tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-LABEL: name: V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64___xdl_write_vgpr__cbsz0_blgp0____xdl_read_overlap_vgpr_srcC - ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $vgpr33, $vgpr21, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 3 - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $vgpr33, killed $vgpr21, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec - renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $vgpr33, $vgpr21, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $vgpr33, killed $vgpr21, 12, 4, implicit $mode, implicit $exec S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... @@ -269,17 +269,17 @@ name: V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64___xdl_write_ tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-LABEL: name: V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64___xdl_write_vgpr__cbsz2_blgp2____xdl_read_overlap_vgpr_srcC - ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $vgpr33, $vgpr21, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_NOP 7 - ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $vgpr33, killed $vgpr21, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec - renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $vgpr33, $vgpr21, 12, 4, implicit $mode, implicit $exec + renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $vgpr33, killed $vgpr21, 12, 4, implicit $mode, implicit $exec S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... diff --git a/llvm/test/MC/AMDGPU/mai-gfx950-err.s b/llvm/test/MC/AMDGPU/mai-gfx950-err.s index e700b0b3cabfe..5c9dbd7f7636f 100644 --- a/llvm/test/MC/AMDGPU/mai-gfx950-err.s +++ b/llvm/test/MC/AMDGPU/mai-gfx950-err.s @@ -156,3 +156,51 @@ v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[12:19], v[4:9], v[0:3] v20, v21 blgp v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[12:19], v[4:11], v[0:3] v20, v21 blgp:4 // CHECK: :[[@LINE-1]]:53: error: wrong register tuple size for blgp value 4 + + +// Workaround a hardware bug to disallow sgpr/inline constants as scale operands + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v44, s24 +// CHECK: :[[@LINE-1]]:77: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s24, v44 +// CHECK: :[[@LINE-1]]:72: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], m0, v24 +// CHECK: :[[@LINE-1]]:72: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], vcc_lo, v24 +// CHECK: :[[@LINE-1]]:72: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 9, v24 +// CHECK: :[[@LINE-1]]:72: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, 9 +// CHECK: :[[@LINE-1]]:77: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 33, v24 +// CHECK: :[[@LINE-1]]:72: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 4.0, v24 +// CHECK: :[[@LINE-1]]:72: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, 4.0 +// CHECK: :[[@LINE-1]]:77: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], -4.0, v24 +// CHECK: :[[@LINE-1]]:72: error: invalid operand for instruction + +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 0.15915494, v24 +// CHECK: :[[@LINE-1]]:72: error: invalid operand for instruction + +v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 16, v49 +// CHECK: :[[@LINE-1]]:73: error: invalid operand for instruction + +v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, -4.0 +// CHECK: :[[@LINE-1]]:78: error: invalid operand for instruction + +v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 4.0, v24 +// CHECK: :[[@LINE-1]]:73: error: invalid operand for instruction + +v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 0.15915494, v24 +// CHECK: :[[@LINE-1]]:73: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/mai-gfx950.s b/llvm/test/MC/AMDGPU/mai-gfx950.s index 2d3a56703674a..c9035033912ac 100644 --- a/llvm/test/MC/AMDGPU/mai-gfx950.s +++ b/llvm/test/MC/AMDGPU/mai-gfx950.s @@ -405,58 +405,6 @@ v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], a[4:11], v[12:19], v[20:23], v24, v25 // ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_scale_f32_16x16x128_f8f6f4 v[50:53], v[4:11], v[12:19], v[20:23], v24, v25 -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v44, s24 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x2c,0x31,0x00,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v44, s24 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s24, s24 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x30,0x00,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s24, s24 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s24, v44 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x58,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s24, v44 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], m0, m0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x7c,0xf8,0x00,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], m0, m0 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], vcc_lo, v2 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x6a,0x04,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], vcc_lo, v2 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 9, v2 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x89,0x04,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 9, v2 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v2, 9 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x02,0x13,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v2, 9 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s20, 9 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x14,0x12,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s20, 9 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 33, 9 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xa1,0x12,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 33, 9 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 4.0, v2 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf6,0x04,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 4.0, v2 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v2, 4.0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x02,0xed,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v2, 4.0 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], -4.0, 1.0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf7,0xe4,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], -4.0, 1.0 - -// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 0.15915494, -16 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf8,0xa0,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 0.15915494, -16 - // GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24] // ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 cbsz:3 blgp:1 @@ -585,22 +533,6 @@ v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:29], v[32:47], v48, v49 // ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:2 blgp:3 -// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 16, v49 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x90,0x62,0x02,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 16, v49 - -// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, -4.0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x30,0xef,0x01,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, -4.0 - -// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 4.0, 1.0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf6,0xe4,0x01,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 4.0, 1.0 - -// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 0.15915494, -16 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf8,0xa0,0x01,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] -// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU -v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 0.15915494, -16 - // op_sel combinations // GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt index 77b87ac63f335..e191455beb64d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt @@ -392,27 +392,6 @@ # GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:17], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:1 blgp:3 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x09,0xad,0xd3,0x04,0x19,0x52,0x64] 0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x09,0xad,0xd3,0x04,0x19,0x52,0x64 -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 33, 9 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xa1,0x12,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0xa1,0x12,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 9, v2 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x89,0x04,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x89,0x04,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], m0, m0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x7c,0xf8,0x00,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x7c,0xf8,0x00,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s20, 9 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x14,0x12,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x14,0x12,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s24, s24 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x30,0x00,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x18,0x30,0x00,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], s24, v44 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x58,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x18,0x58,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v2, 9 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x02,0x13,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x02,0x13,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - # GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] 0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 @@ -422,15 +401,6 @@ # GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x24] 0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x24 -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x09,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x09,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v44, s24 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x2c,0x31,0x00,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x2c,0x31,0x00,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], vcc_lo, v2 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x6a,0x04,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x6a,0x04,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - # GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:7], v[12:15], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:4 blgp:4 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0c,0xad,0xd3,0x04,0x19,0x52,0x84] 0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0c,0xad,0xd3,0x04,0x19,0x52,0x84 @@ -467,18 +437,6 @@ # GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[50:53], v[4:11], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x32,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] 0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x00,0x32,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 4.0, v2 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf6,0x04,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0xf6,0x04,0x02,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v2, 4.0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x02,0xed,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0x02,0xed,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], -4.0, 1.0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf7,0xe4,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0xf7,0xe4,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - -# GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], 0.15915494, -16 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf8,0xa0,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04] -0x00,0x00,0xac,0xd3,0xf8,0xa0,0x01,0x00,0x00,0x08,0xad,0xd3,0x04,0x19,0x52,0x04 - # GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], a[16:19], a[24:27], a[32:47], v48, v49 op_sel_hi:[0,0,0] cbsz:4 blgp:4 ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x8c,0xae,0xd3,0x10,0x31,0x82,0x9c] 0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x00,0x00,0x8c,0xae,0xd3,0x10,0x31,0x82,0x9c @@ -581,18 +539,6 @@ # GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[50:65], v[16:23], v[24:31], v[32:47], v48, v49 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x00,0x32,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] 0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x00,0x32,0x08,0xae,0xd3,0x10,0x31,0x82,0x04 -# GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 16, v49 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x90,0x62,0x02,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] -0x00,0x00,0xac,0xd3,0x90,0x62,0x02,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04 - -# GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], v48, -4.0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0x30,0xef,0x01,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] -0x00,0x00,0xac,0xd3,0x30,0xef,0x01,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04 - -# GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 4.0, 1.0 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf6,0xe4,0x01,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] -0x00,0x00,0xac,0xd3,0xf6,0xe4,0x01,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04 - -# GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[32:47], 0.15915494, -16 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xac,0xd3,0xf8,0xa0,0x01,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04] -0x00,0x00,0xac,0xd3,0xf8,0xa0,0x01,0x00,0x00,0x08,0xae,0xd3,0x10,0x31,0x82,0x04 - # GFX950: v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] ; encoding: [0x00,0x80,0xb6,0xd3,0x00,0x01,0x02,0x1c] 0x00,0x80,0xb6,0xd3,0x00,0x01,0x02,0x1c From eb3b7ddc697c379894dc9b09b158697d44f7c25b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 05:12:01 +0900 Subject: [PATCH 026/734] X86: Fix win64 tail call regression for tail call to loaded pointer (#158055) Fix regression after 62f2641d603db9aef99dd5c434a1dfe7d3f56346. Previous patch handled the register case, but the memory case snuck another use of ptr_rc_tailcall hidden inside i64mem_TC --- llvm/lib/Target/X86/X86AsmPrinter.cpp | 6 +-- llvm/lib/Target/X86/X86ExpandPseudo.cpp | 9 ++-- llvm/lib/Target/X86/X86FrameLowering.cpp | 2 +- llvm/lib/Target/X86/X86InstrCompiler.td | 12 +++-- llvm/lib/Target/X86/X86InstrControl.td | 3 ++ llvm/lib/Target/X86/X86InstrOperands.td | 5 ++ llvm/lib/Target/X86/X86RegisterInfo.cpp | 1 + .../X86/X86SpeculativeLoadHardening.cpp | 1 + .../test/CodeGen/X86/win64-tailcall-memory.ll | 48 +++++++++++++++++++ 9 files changed, 76 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/X86/win64-tailcall-memory.ll diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index ff22ee8c86fac..a7734e9200a19 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -478,9 +478,9 @@ static bool isIndirectBranchOrTailCall(const MachineInstr &MI) { Opc == X86::TAILJMPr64 || Opc == X86::TAILJMPm64 || Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri || Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNmi || - Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNmi64 || - Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TAILJMPr64_REX || - Opc == X86::TAILJMPm64_REX; + Opc == X86::TCRETURN_WINmi64 || Opc == X86::TCRETURNri64 || + Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURNri64_ImpCall || + Opc == X86::TAILJMPr64_REX || Opc == X86::TAILJMPm64_REX; } void X86AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 9457e718de699..4a9b824b0db14 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -276,8 +276,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, case X86::TCRETURNdi64cc: case X86::TCRETURNri64: case X86::TCRETURNri64_ImpCall: - case X86::TCRETURNmi64: { - bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; + case X86::TCRETURNmi64: + case X86::TCRETURN_WINmi64: { + bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64 || + Opcode == X86::TCRETURN_WINmi64; MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands : 1); @@ -341,7 +343,8 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, MIB.addImm(MBBI->getOperand(2).getImm()); } - } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { + } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64 || + Opcode == X86::TCRETURN_WINmi64) { unsigned Op = (Opcode == X86::TCRETURNmi) ? X86::TAILJMPm : (IsX64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index a293b4c87cfe4..08c9d738baceb 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2402,7 +2402,7 @@ static bool isTailCallOpcode(unsigned Opc) { Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi || Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 || - Opc == X86::TCRETURNmi64; + Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURN_WINmi64; } void X86FrameLowering::emitEpilogue(MachineFunction &MF, diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 5a0df058b27f6..af7a33abaf758 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1364,15 +1364,19 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off), (TCRETURNmi64 addr:$dst, timm:$off)>, - Requires<[In64BitMode, NotUseIndirectThunkCalls]>; + Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls]>; + +def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off), + (TCRETURN_WINmi64 addr:$dst, timm:$off)>, + Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>, - Requires<[In64BitMode, UseIndirectThunkCalls]>; + Requires<[In64BitMode, IsNotWin64CCFunc, UseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, timm:$off)>, - Requires<[Not64BitMode, UseIndirectThunkCalls]>; + Requires<[Not64BitMode, IsNotWin64CCFunc, UseIndirectThunkCalls]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), timm:$off), (TCRETURNdi64 tglobaladdr:$dst, timm:$off)>, @@ -2215,7 +2219,7 @@ let Predicates = [HasZU] in { def : Pat<(i64 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))), (SUBREG_TO_REG (i64 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>; } - + // mul reg, imm def : Pat<(mul GR16:$src1, imm:$src2), (IMUL16rri GR16:$src1, imm:$src2)>; diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index 139aedd473ebc..d962bfff1444d 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -372,6 +372,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, def TCRETURNmi64 : PseudoI<(outs), (ins i64mem_TC:$dst, i32imm:$offset), []>, Sched<[WriteJumpLd]>; + def TCRETURN_WINmi64 : PseudoI<(outs), + (ins i64mem_w64TC:$dst, i32imm:$offset), + []>, Sched<[WriteJumpLd]>; def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_brtarget:$dst), []>, Sched<[WriteJump]>; diff --git a/llvm/lib/Target/X86/X86InstrOperands.td b/llvm/lib/Target/X86/X86InstrOperands.td index 53a6b7c4c4c92..80843f6bb80e6 100644 --- a/llvm/lib/Target/X86/X86InstrOperands.td +++ b/llvm/lib/Target/X86/X86InstrOperands.td @@ -141,6 +141,11 @@ def i64mem_TC : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64> { ptr_rc_tailcall, i32imm, SEGMENT_REG); } +def i64mem_w64TC : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64> { + let MIOperandInfo = (ops GR64_TCW64, i8imm, + GR64_TCW64, i32imm, SEGMENT_REG); +} + // Special parser to detect 16-bit mode to select 16-bit displacement. def X86AbsMemMode16AsmOperand : AsmOperandClass { let Name = "AbsMemMode16"; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 9ec04e740a08b..7963dc1b755c9 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -1010,6 +1010,7 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg( case X86::TCRETURNri64: case X86::TCRETURNri64_ImpCall: case X86::TCRETURNmi64: + case X86::TCRETURN_WINmi64: case X86::EH_RETURN: case X86::EH_RETURN64: { LiveRegUnits LRU(*this); diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index 4cc456ece77e0..c28de14a97874 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -893,6 +893,7 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( case X86::TAILJMPm64_REX: case X86::TAILJMPm: case X86::TCRETURNmi64: + case X86::TCRETURN_WINmi64: case X86::TCRETURNmi: { // Use the generic unfold logic now that we know we're dealing with // expected instructions. diff --git a/llvm/test/CodeGen/X86/win64-tailcall-memory.ll b/llvm/test/CodeGen/X86/win64-tailcall-memory.ll new file mode 100644 index 0000000000000..568f4fe04fea9 --- /dev/null +++ b/llvm/test/CodeGen/X86/win64-tailcall-memory.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=x86_64-unknown-windows-gnu < %s | FileCheck %s + +; Check calling convention is correct for win64 when doing a tailcall +; for a pointer loaded from memory. + +declare void @foo(i64, ptr) + +define void @do_tailcall(ptr %objp) nounwind { +; CHECK-LABEL: do_tailcall: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rsi +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: movq %rcx, %rsi +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: callq foo +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: popq %rsi +; CHECK-NEXT: rex64 jmpq *(%rax) # TAILCALL + tail call void @foo(i64 0, ptr null) + %fptr = load ptr, ptr %objp, align 8 + tail call void %fptr(ptr null) + ret void +} + +; Make sure aliases of ccc are also treated as win64 functions +define fastcc void @do_tailcall_fastcc(ptr %objp) nounwind { +; CHECK-LABEL: do_tailcall_fastcc: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rsi +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: movq %rcx, %rsi +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: callq foo +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: popq %rsi +; CHECK-NEXT: rex64 jmpq *(%rax) # TAILCALL + tail call void @foo(i64 0, ptr null) + %fptr = load ptr, ptr %objp, align 8 + tail call fastcc void %fptr(ptr null) + ret void +} From ca09801bd03579f28edac60077a164fab0474eb4 Mon Sep 17 00:00:00 2001 From: Tomohiro Kashiwada Date: Fri, 12 Sep 2025 05:12:54 +0900 Subject: [PATCH 027/734] [LLVM][Coverage][Unittest] Fix dangling reference in unittest (#147118) In loop of `writeAndReadCoverageRegions`, `OutputFunctions[I].Filenames` references to contents of `Filenames` after returning from `readCoverageRegions` but `Filenames` will be cleared in next call of `readCoverageRegions`, causes dangling reference. The lifetime of the contents of `Filenames` must be equal or longer than `OutputFunctions[I]`, thus it has been moved into `OutputFunctions[I]` (typed `OutputFunctionCoverageData`). --- .../ProfileData/CoverageMappingTest.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/unittests/ProfileData/CoverageMappingTest.cpp b/llvm/unittests/ProfileData/CoverageMappingTest.cpp index ec81e5f274efa..b268aa7cdd057 100644 --- a/llvm/unittests/ProfileData/CoverageMappingTest.cpp +++ b/llvm/unittests/ProfileData/CoverageMappingTest.cpp @@ -64,6 +64,7 @@ namespace { struct OutputFunctionCoverageData { StringRef Name; uint64_t Hash; + std::vector FilenamesStorage; std::vector Filenames; std::vector Regions; std::vector Expressions; @@ -71,8 +72,10 @@ struct OutputFunctionCoverageData { OutputFunctionCoverageData() : Hash(0) {} OutputFunctionCoverageData(OutputFunctionCoverageData &&OFCD) - : Name(OFCD.Name), Hash(OFCD.Hash), Filenames(std::move(OFCD.Filenames)), - Regions(std::move(OFCD.Regions)) {} + : Name(OFCD.Name), Hash(OFCD.Hash), + FilenamesStorage(std::move(OFCD.FilenamesStorage)), + Filenames(std::move(OFCD.Filenames)), Regions(std::move(OFCD.Regions)) { + } OutputFunctionCoverageData(const OutputFunctionCoverageData &) = delete; OutputFunctionCoverageData & @@ -135,7 +138,6 @@ struct InputFunctionCoverageData { struct CoverageMappingTest : ::testing::TestWithParam> { bool UseMultipleReaders; StringMap Files; - std::vector Filenames; std::vector InputFunctions; std::vector OutputFunctions; @@ -233,13 +235,11 @@ struct CoverageMappingTest : ::testing::TestWithParam> { void readCoverageRegions(const std::string &Coverage, OutputFunctionCoverageData &Data) { - // We will re-use the StringRef in duplicate tests, clear it to avoid - // clobber previous ones. - Filenames.clear(); - Filenames.resize(Files.size() + 1); + // +1 here since `Files` (filename to index map) uses 1-based index. + Data.FilenamesStorage.resize(Files.size() + 1); for (const auto &E : Files) - Filenames[E.getValue()] = E.getKey().str(); - ArrayRef FilenameRefs = llvm::ArrayRef(Filenames); + Data.FilenamesStorage[E.getValue()] = E.getKey().str(); + ArrayRef FilenameRefs = llvm::ArrayRef(Data.FilenamesStorage); RawCoverageMappingReader Reader(Coverage, FilenameRefs, Data.Filenames, Data.Expressions, Data.Regions); EXPECT_THAT_ERROR(Reader.read(), Succeeded()); From 3097688a4706ee232b4a3256cff2499481348f03 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 11 Sep 2025 13:21:59 -0700 Subject: [PATCH 028/734] [SimplifyCFG] Set branch weights when merging conditional store to address (#154841) --- llvm/include/llvm/IR/ProfDataUtils.h | 29 ++++++++++++++++++- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 16 ++++++++++ .../SimplifyCFG/merge-cond-stores.ll | 14 ++++++--- 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index ce9f4c2de2cae..de9675f48c79b 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -16,7 +16,6 @@ #define LLVM_IR_PROFDATAUTILS_H #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Twine.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Compiler.h" @@ -197,5 +196,33 @@ LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I); /// Scaling the profile data attached to 'I' using the ratio of S/T. LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T); +/// Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 +/// are 2 booleans that are the conditions of 2 branches for which we have the +/// branch weights B1 and B2, respectively. In both B1 and B2, the first +/// position (index 0) is for the 'true' branch, and the second position (index +/// 1) is for the 'false' branch. +inline SmallVector +getDisjunctionWeights(const SmallVector &B1, + const SmallVector &B2) { + // For the first conditional branch, the probability the "true" case is taken + // is p(b1) = B1[0] / (B1[0] + B1[1]). The "false" case's probability is + // p(not b1) = B1[1] / (B1[0] + B1[1]). + // Similarly for the second conditional branch and B2. + // + // The probability of the new branch NOT being taken is: + // not P = p((not b1) and (not b2)) = + // = B1[1] / (B1[0]+B1[1]) * B2[1] / (B2[0]+B2[1]) = + // = B1[1] * B2[1] / (B1[0] + B1[1]) * (B2[0] + B2[1]) + // Then the probability of it being taken is: P = 1 - (not P). + // The denominator will be the same as above, and the numerator of P will be: + // (B1[0] + B1[1]) * (B2[0] + B2[1]) - B1[1]*B2[1] + // Which then reduces to what's shown below (out of the 4 terms coming out of + // the product of sums, the subtracted one cancels out). + assert(B1.size() == 2); + assert(B2.size() == 2); + auto FalseWeight = B1[1] * B2[1]; + auto TrueWeight = B1[0] * B2[0] + B1[0] * B2[1] + B1[1] * B2[0]; + return {TrueWeight, FalseWeight}; +} } // namespace llvm #endif diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 970f85378d3d2..850e57e6b0b14 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -203,6 +203,8 @@ static cl::opt MaxJumpThreadingLiveBlocks( cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in")); +extern cl::opt ProfcheckDisableMetadataFixes; + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -4438,6 +4440,20 @@ static bool mergeConditionalStoreToAddress( auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt, /*Unreachable=*/false, /*BranchWeights=*/nullptr, DTU); + if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) && + !ProfcheckDisableMetadataFixes) { + SmallVector PWeights, QWeights; + extractBranchWeights(*PBranch, PWeights); + extractBranchWeights(*QBranch, QWeights); + if (InvertPCond) + std::swap(PWeights[0], PWeights[1]); + if (InvertQCond) + std::swap(QWeights[0], QWeights[1]); + auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights); + setBranchWeights(PostBB->getTerminator(), CombinedWeights[0], + CombinedWeights[1], + /*IsExpected=*/false); + } QB.SetInsertPoint(T); StoreInst *SI = cast(QB.CreateStore(QPHI, Address)); diff --git a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll index e1bd7916b3be0..b1cce4484bbab 100644 --- a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll +++ b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -passes=simplifycfg,instcombine -simplifycfg-require-and-preserve-domtree=1 < %s -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=2 -S | FileCheck %s ; This test should succeed and end up if-converted. @@ -43,7 +43,7 @@ define void @test_simple_commuted(ptr %p, i32 %a, i32 %b) { ; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: [[X3:%.*]] = icmp eq i32 [[B1:%.*]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[X2]], [[X3]] -; CHECK-NEXT: br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]] +; CHECK-NEXT: br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: 1: ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = zext i1 [[X3]] to i32 ; CHECK-NEXT: store i32 [[SPEC_SELECT]], ptr [[P:%.*]], align 4 @@ -53,7 +53,7 @@ define void @test_simple_commuted(ptr %p, i32 %a, i32 %b) { ; entry: %x1 = icmp eq i32 %a, 0 - br i1 %x1, label %yes1, label %fallthrough + br i1 %x1, label %yes1, label %fallthrough, !prof !0 yes1: store i32 0, ptr %p @@ -61,7 +61,7 @@ yes1: fallthrough: %x2 = icmp eq i32 %b, 0 - br i1 %x2, label %yes2, label %end + br i1 %x2, label %yes2, label %end, !prof !1 yes2: store i32 1, ptr %p @@ -406,3 +406,9 @@ yes2: end: ret void } + +!0 = !{!"branch_weights", i32 7, i32 13} +!1 = !{!"branch_weights", i32 3, i32 11} +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 137, i32 143} +;. From b64ed9d79ebc4887d7452f5fa4d08cfa6640f8ab Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Thu, 11 Sep 2025 13:32:51 -0700 Subject: [PATCH 029/734] [WebKit checkers] Recognize NS_RETURNS_RETAINED and CF_RETURNS_RETAINED. (#157629) This PR adds the support for treating a function return value to be safe if the function is annotated with NS_RETURNS_RETAINED or CF_RETURNS_RETAINED. --- .../Checkers/WebKit/ASTUtils.cpp | 7 +++++ .../Checkers/WebKit/unretained-call-args.mm | 26 +++++++++++++++++++ .../Checkers/WebKit/unretained-local-vars.mm | 15 +++++++++++ 3 files changed, 48 insertions(+) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index 9a7f5b71cae71..3fc10385885a3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -91,6 +91,13 @@ bool tryToFindPtrOrigin( continue; } if (auto *call = dyn_cast(E)) { + if (auto *Callee = call->getCalleeDecl()) { + if (Callee->hasAttr() || + Callee->hasAttr()) { + return callback(E, true); + } + } + if (auto *memberCall = dyn_cast(call)) { if (auto *decl = memberCall->getMethodDecl()) { std::optional IsGetterOfRefCt = isGetterOfSafePtr(decl); diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm index 3feecd930f109..f39822ee2a8c6 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm @@ -438,6 +438,32 @@ void use_const_local() { } // namespace const_global +namespace ns_retained_return_value { + +NSString *provideNS() NS_RETURNS_RETAINED; +CFDictionaryRef provideCF() CF_RETURNS_RETAINED; +void consumeNS(NSString *); +void consumeCF(CFDictionaryRef); + +void foo() { + consumeNS(provideNS()); + consumeCF(provideCF()); +} + +struct Base { + NSString *provideStr() NS_RETURNS_RETAINED; +}; + +struct Derived : Base { + void consumeStr(NSString *); + + void foo() { + consumeStr(provideStr()); + } +}; + +} // namespace ns_retained_return_value + @interface TestObject : NSObject - (void)doWork:(NSString *)msg, ...; - (void)doWorkOnSelf; diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm index 10f7c9acb7a3c..0ad8f707e254c 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm @@ -408,6 +408,21 @@ void use_const_local() { } // namespace const_global +namespace ns_retained_return_value { + +NSString *provideNS() NS_RETURNS_RETAINED; +CFDictionaryRef provideCF() CF_RETURNS_RETAINED; +void consumeNS(NSString *); +void consumeCF(CFDictionaryRef); + +unsigned foo() { + auto *string = provideNS(); + auto *dictionary = provideCF(); + return string.length + CFDictionaryGetCount(dictionary); +} + +} // namespace ns_retained_return_value + bool doMoreWorkOpaque(OtherObj*); SomeObj* provide(); From 82218fb1db0b7b1ddeeb7f8c3ab494a57776764e Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 16:33:04 -0400 Subject: [PATCH 030/734] [libc++] Use the correct pull-request base and targets for the benchmarking job --- .github/workflows/libcxx-run-benchmarks.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml index 98fa016a8949e..a5535a0033b8d 100644 --- a/.github/workflows/libcxx-run-benchmarks.yml +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -21,7 +21,9 @@ on: env: CC: clang-22 CXX: clang++-22 - COMMENT_BODY: ${{ github.event.comment.body }} + COMMENT_BODY: ${{ github.event.comment.body }} + PULL_REQUEST_HEAD: ${{ github.event.issue.pull_request.head.sha }} + PULL_REQUEST_BASE: ${{ github.event.issue.pull_request.base.sha }} jobs: run-benchmarks: @@ -33,6 +35,7 @@ jobs: steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: + ref: ${PULL_REQUEST_HEAD} fetch-depth: 0 fetch-tags: true # This job requires access to all the Git branches so it can diff against (usually) main @@ -49,13 +52,13 @@ jobs: - name: Run baseline run: | BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -nE 's/\/libcxx-bot benchmark (.+)/\1/p') - baseline_commit=$(git merge-base refs/remotes/origin/${GITHUB_BASE_REF} ${GITHUB_SHA}) + baseline_commit=$(git merge-base ${PULL_REQUEST_BASE} ${PULL_REQUEST_SHA}) ./libcxx/utils/test-at-commit --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${BENCHMARKS} - name: Run candidate run: | BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -nE 's/\/libcxx-bot benchmark (.+)/\1/p') - ./libcxx/utils/test-at-commit --commit ${GITHUB_SHA} -B build/candidate -- -sv -j1 --param optimization=speed ${BENCHMARKS} + ./libcxx/utils/test-at-commit --commit ${PULL_REQUEST_SHA} -B build/candidate -- -sv -j1 --param optimization=speed ${BENCHMARKS} - name: Compare baseline and candidate runs run: ./libcxx/utils/compare-benchmarks <(./libcxx/utils/consolidate-benchmarks build/baseline) \ From 65787728b72a2b1f1bfdefd15d32ec0a69f2b941 Mon Sep 17 00:00:00 2001 From: nerix Date: Thu, 11 Sep 2025 22:35:19 +0200 Subject: [PATCH 031/734] [LLDB][NativePDB] Implement `AddSymbols` (#154121) This PR implements `SymbolFileNativePDB::AddSymbols` which adds public symbols to the symbol table. These symbols are found in the publics stream. It contains mangled names coupled with addresses. Addresses are a pair of (segment, offset). If I understood correctly, then the segment is the section ID from the COFF header. Sections are already [constructed](https://github.com/llvm/llvm-project/blob/c48ec7fb60b5e0b4100731d75f82ea63c0ec7b45/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp#L1048) using this 1-based index ([MS docs](https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#section-table-section-headers)). This allows us to use `section_list->FindSectionByID`. --- .../NativePDB/SymbolFileNativePDB.cpp | 39 +++++++++++- .../SymbolFile/NativePDB/disassembly.cpp | 4 +- .../SymbolFile/NativePDB/inline_sites.test | 7 +++ .../NativePDB/local-variables-registers.s | 30 ++++++++++ .../NativePDB/nested-blocks-same-address.s | 1 + .../Shell/SymbolFile/NativePDB/symtab.cpp | 59 +++++++++++++++++++ .../Unwind/windows-unaligned-x86_64.test | 3 +- 7 files changed, 138 insertions(+), 5 deletions(-) create mode 100644 lldb/test/Shell/SymbolFile/NativePDB/symtab.cpp diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index e99c585d7eb1f..cfecda4817976 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -1054,7 +1054,44 @@ lldb::LanguageType SymbolFileNativePDB::ParseLanguage(CompileUnit &comp_unit) { return TranslateLanguage(item->m_compile_opts->getLanguage()); } -void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {} +void SymbolFileNativePDB::AddSymbols(Symtab &symtab) { + auto *section_list = m_objfile_sp->GetSectionList(); + if (!section_list) + return; + + for (auto pid : m_index->publics().getPublicsTable()) { + PdbGlobalSymId global{pid, true}; + CVSymbol sym = m_index->ReadSymbolRecord(global); + auto kind = sym.kind(); + if (kind != S_PUB32) + continue; + PublicSym32 pub = + llvm::cantFail(SymbolDeserializer::deserializeAs(sym)); + + auto section_sp = section_list->FindSectionByID(pub.Segment); + if (!section_sp) + continue; + + lldb::SymbolType type = eSymbolTypeData; + if ((pub.Flags & PublicSymFlags::Function) != PublicSymFlags::None || + (pub.Flags & PublicSymFlags::Code) != PublicSymFlags::None) + type = eSymbolTypeCode; + + symtab.AddSymbol(Symbol(/*symID=*/pid, + /*name=*/pub.Name, + /*type=*/type, + /*external=*/true, + /*is_debug=*/true, + /*is_trampoline=*/false, + /*is_artificial=*/false, + /*section_sp=*/section_sp, + /*value=*/pub.Offset, + /*size=*/0, + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0)); + } +} size_t SymbolFileNativePDB::ParseFunctions(CompileUnit &comp_unit) { std::lock_guard guard(GetModuleMutex()); diff --git a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp index db3b85fa7e59f..b3f7b098a95d9 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp @@ -18,9 +18,7 @@ int main(int argc, char **argv) { // CHECK: (lldb) disassemble --flavor=intel -m -n main -// CHECK: 12 int foo() { return 42; } -// CHECK-NEXT: 13 -// CHECK-NEXT: ** 14 int main(int argc, char **argv) { +// CHECK: ** 14 int main(int argc, char **argv) { // CHECK: disassembly.cpp.tmp.exe`main: // CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+0>: sub rsp, 0x38 // CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+4>: mov dword ptr [rsp + 0x34], 0x0 diff --git a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test index 6293148d90ce4..769f18de51472 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test +++ b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test @@ -61,6 +61,7 @@ # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) # CHECK: Blocks: id = {{.*}}, range = [0x140001000-0x140001046) # CHECK: LineEntry: [0x0000000140001000-0x0000000140001004): /tmp/a.cpp:2 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001046), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "argc", type = "int", valid ranges = , location = [0x0000000140001000, 0x000000014000102d) -> DW_OP_reg26 XMM9 # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX @@ -71,6 +72,7 @@ # CHECK: Blocks: id = {{.*}}, range = [0x140001000-0x140001046) # CHECK-NEXT: id = {{.*}}, ranges = [0x140001004-0x140001039)[0x14000103f-0x140001046), name = "Namespace1::foo", decl = a.h:4 # CHECK: LineEntry: [0x0000000140001004-0x000000014000100c): /tmp/a.h:5 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001046), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "x", type = "int", valid ranges = , location = , decl = # CHECK-NEXT: Variable: id = {{.*}}, name = "foo_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001039) -> DW_OP_breg7 RSP+44 # CHECK-NEXT: Variable: id = {{.*}}, name = "argc", type = "int", valid ranges = , location = [0x0000000140001000, 0x000000014000102d) -> DW_OP_reg26 XMM9 @@ -84,6 +86,7 @@ # CHECK: Blocks: id = {{.*}}, range = [0x140001000-0x140001046) # CHECK-NEXT: id = {{.*}}, ranges = [0x140001004-0x140001039)[0x14000103f-0x140001046), name = "Namespace1::foo", decl = a.h:4 # CHECK: LineEntry: [0x0000000140001010-0x0000000140001018): /tmp/a.h:7 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001046), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "x", type = "int", valid ranges = , location = , decl = # CHECK-NEXT: Variable: id = {{.*}}, name = "foo_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001039) -> DW_OP_breg7 RSP+44 # CHECK-NEXT: Variable: id = {{.*}}, name = "argc", type = "int", valid ranges = , location = [0x0000000140001000, 0x000000014000102d) -> DW_OP_reg26 XMM9 @@ -99,6 +102,7 @@ # CHECK-NEXT: id = {{.*}}, ranges = [0x140001004-0x140001039)[0x14000103f-0x140001046), name = "Namespace1::foo", decl = a.h:4 # CHECK-NEXT: id = {{.*}}, range = [0x14000101c-0x140001039), name = "Class1::bar", decl = b.h:4 # CHECK: LineEntry: [0x000000014000101c-0x0000000140001022): /tmp/b.h:5 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001046), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "x", type = "int", valid ranges = , location = [0x000000014000101c, 0x000000014000101e) -> DW_OP_reg24 XMM7 # CHECK-NEXT: Variable: id = {{.*}}, name = "bar_local", type = "int", valid ranges = , location = [0x000000014000101c, 0x0000000140001039) -> DW_OP_breg7 RSP+52 # CHECK-NEXT: Variable: id = {{.*}}, name = "x", type = "int", valid ranges = , location = , decl = @@ -118,6 +122,7 @@ # CHECK-NEXT: id = {{.*}}, range = [0x14000101c-0x140001039), name = "Class1::bar", decl = b.h:4 # CHECK-NEXT: id = {{.*}}, range = [0x14000102a-0x140001039), name = "Namespace2::Class2::func", decl = c.h:4 # CHECK: LineEntry: [0x000000014000102a-0x0000000140001031): /tmp/c.h:5 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001046), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "x", type = "int", valid ranges = , location = [0x000000014000102a, 0x0000000140001039) -> DW_OP_reg24 XMM7 # CHECK-NEXT: Variable: id = {{.*}}, name = "func_local", type = "int", valid ranges = , location = [0x000000014000102a, 0x0000000140001039) -> DW_OP_breg7 RSP+48 # CHECK-NEXT: Variable: id = {{.*}}, name = "bar_local", type = "int", valid ranges = , location = [0x000000014000101c, 0x0000000140001039) -> DW_OP_breg7 RSP+52 @@ -132,6 +137,7 @@ # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) # CHECK: Blocks: id = {{.*}}, range = [0x140001000-0x140001046) # CHECK: LineEntry: [0x0000000140001039-0x000000014000103d): /tmp/a.cpp:3 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001046), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 @@ -142,6 +148,7 @@ # CHECK: Blocks: id = {{.*}}, range = [0x140001000-0x140001046) # CHECK-NEXT: id = {{.*}}, ranges = [0x140001004-0x140001039)[0x14000103f-0x140001046), name = "Namespace1::foo", decl = a.h:4 # CHECK: LineEntry: [0x0000000140001044-0x0000000140001046): /tmp/a.h:8 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001046), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "x", type = "int", valid ranges = , location = , decl = # CHECK-NEXT: Variable: id = {{.*}}, name = "foo_local", type = "int", valid ranges = , location = [0x0000000140001044, 0x0000000140001046) -> DW_OP_breg7 RSP+44 # CHECK-NEXT: Variable: id = {{.*}}, name = "argc", type = "int", valid ranges = , location = [0x0000000140001044, 0x0000000140001045) -> DW_OP_reg26 XMM9 diff --git a/lldb/test/Shell/SymbolFile/NativePDB/local-variables-registers.s b/lldb/test/Shell/SymbolFile/NativePDB/local-variables-registers.s index 85d92a2447939..fe2f397d60c01 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/local-variables-registers.s +++ b/lldb/test/Shell/SymbolFile/NativePDB/local-variables-registers.s @@ -34,38 +34,46 @@ # CHECK: (lldb) image lookup -a 0x140001000 -v # CHECK: LineEntry: [0x0000000140001000-0x0000000140001003): C:\src\test\a.cpp:10 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001011), name="struct S CreateS(int, char)", mangled="?CreateS@@YA?AUS@@HD@Z" # CHECK-NEXT: Variable: id = {{.*}}, name = "p1", type = "int", valid ranges = , location = [0x0000000140001000, 0x0000000140001003) -> DW_OP_reg26 XMM9 # CHECK-NEXT: Variable: id = {{.*}}, name = "p2", type = "char", valid ranges = , location = [0x0000000140001000, 0x0000000140001006) -> DW_OP_regx 0x3f # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001003 -v # CHECK: LineEntry: [0x0000000140001003-0x0000000140001006): C:\src\test\a.cpp:11 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001011), name="struct S CreateS(int, char)", mangled="?CreateS@@YA?AUS@@HD@Z" # CHECK-NEXT: Variable: id = {{.*}}, name = "p2", type = "char", valid ranges = , location = [0x0000000140001000, 0x0000000140001006) -> DW_OP_regx 0x3f # CHECK-NEXT: Variable: id = {{.*}}, name = "s", type = "S", valid ranges = , location = [0x0000000140001003, 0x0000000140001006) -> DW_OP_piece 0x4, DW_OP_regx 0x3f, DW_OP_piece 0x1, DW_OP_piece 0x3 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001006 -v # CHECK: LineEntry: [0x0000000140001006-0x0000000140001011): C:\src\test\a.cpp:12 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001000-0x0000000140001011), name="struct S CreateS(int, char)", mangled="?CreateS@@YA?AUS@@HD@Z" # CHECK-NEXT: Variable: id = {{.*}}, name = "s", type = "S", valid ranges = , location = [0x0000000140001006, 0x0000000140001011) -> DW_OP_reg26 XMM9, DW_OP_piece 0x4, DW_OP_regx 0x3f, DW_OP_piece 0x1, DW_OP_piece 0x3 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001011 -v # CHECK: LineEntry: [0x0000000140001011-0x0000000140001015): C:\src\test\a.cpp:15 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "argc", type = "int", valid ranges = , location = [0x0000000140001011, 0x0000000140001017) -> DW_OP_reg26 XMM9 # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001011, 0x0000000140001019) -> DW_OP_reg3 RBX # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001017 -v # CHECK: LineEntry: [0x0000000140001017-0x000000014000101e): C:\src\test\a.cpp:17 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001011, 0x0000000140001019) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "local", type = "int", valid ranges = , location = [0x0000000140001017, 0x000000014000101e) -> DW_OP_reg26 XMM9 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001019 -v # CHECK: LineEntry: [0x0000000140001017-0x000000014000101e): C:\src\test\a.cpp:17 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "local", type = "int", valid ranges = , location = [0x0000000140001017, 0x000000014000101e) -> DW_OP_reg26 XMM9 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x14000101e -v # CHECK: LineEntry: [0x000000014000101e-0x0000000140001031): C:\src\test\a.cpp:18 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "s", type = "S", valid ranges = , location = [0x000000014000101e, 0x000000014000102c) -> DW_OP_reg24 XMM7, DW_OP_piece 0x4, DW_OP_piece 0x4 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x14000102c -v # CHECK: LineEntry: [0x000000014000101e-0x0000000140001031): C:\src\test\a.cpp:18 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" .text .def @feat.00; @@ -406,14 +414,17 @@ main: # @main .short .Ltmp103-.Ltmp102 # CHECK: (lldb) image lookup -a 0x140001031 -v # CHECK: LineEntry: [0x0000000140001031-0x0000000140001034): C:\src\test\a.cpp:1000 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "non_overlapped_ranges", type = "S1", valid ranges = , location = [0x0000000140001031, 0x0000000140001032) -> DW_OP_reg3 RBX # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001032 -v # CHECK: LineEntry: [0x0000000140001031-0x0000000140001034): C:\src\test\a.cpp:1000 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "non_overlapped_ranges", type = "S1", valid ranges = , location = [0x0000000140001032, 0x0000000140001033) -> DW_OP_reg2 RCX # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001033 -v # CHECK: LineEntry: [0x0000000140001031-0x0000000140001034): C:\src\test\a.cpp:1000 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "non_overlapped_ranges", type = "S1", valid ranges = , location = [0x0000000140001033, 0x0000000140001034) -> DW_OP_reg8 R8 # CHECK-EMPTY: @@ -431,18 +442,22 @@ main: # @main .short .Ltmp105-.Ltmp104 # CHECK: (lldb) image lookup -a 0x140001034 -v # CHECK: LineEntry: [0x0000000140001034-0x000000014000103b): C:\src\test\a.cpp:1001 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_subfield_ranges", type = "S1", valid ranges = , location = [0x0000000140001034, 0x0000000140001035) -> DW_OP_regx 0x3f, DW_OP_piece 0x1, DW_OP_piece 0x7 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001035 -v # CHECK: LineEntry: [0x0000000140001034-0x000000014000103b): C:\src\test\a.cpp:1001 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_subfield_ranges", type = "S1", valid ranges = , location = [0x0000000140001035, 0x0000000140001036) -> DW_OP_regx 0x3f, DW_OP_piece 0x1, DW_OP_piece 0x3, DW_OP_reg24 XMM7, DW_OP_piece 0x4 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001036 -v # CHECK: LineEntry: [0x0000000140001034-0x000000014000103b): C:\src\test\a.cpp:1001 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_subfield_ranges", type = "S1", valid ranges = , location = [0x0000000140001036, 0x0000000140001037) -> DW_OP_piece 0x4, DW_OP_reg24 XMM7, DW_OP_piece 0x4 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001037 -v # CHECK: LineEntry: [0x0000000140001034-0x000000014000103b): C:\src\test\a.cpp:1001 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_subfield_ranges", type = "S1", valid ranges = , location = [0x0000000140001037, 0x0000000140001039) -> DW_OP_piece 0x4, DW_OP_reg26 XMM9, DW_OP_piece 0x4 # CHECK-EMPTY: @@ -461,22 +476,27 @@ main: # @main .short .Ltmp107-.Ltmp106 # CHECK: (lldb) image lookup -a 0x14000103b -v # CHECK: LineEntry: [0x000000014000103b-0x0000000140001045): C:\src\test\a.cpp:1002 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_2", type = "S1", valid ranges = , location = [0x000000014000103b, 0x000000014000103c) -> DW_OP_reg3 RBX # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x14000103d -v # CHECK: LineEntry: [0x000000014000103b-0x0000000140001045): C:\src\test\a.cpp:1002 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_2", type = "S1", valid ranges = , location = [0x000000014000103c, 0x000000014000103e) -> DW_OP_reg2 RCX # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x14000103f -v # CHECK: LineEntry: [0x000000014000103b-0x0000000140001045): C:\src\test\a.cpp:1002 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_2", type = "S1", valid ranges = , location = [0x000000014000103f, 0x0000000140001041) -> DW_OP_reg11 R11 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001041 -v # CHECK: LineEntry: [0x000000014000103b-0x0000000140001045): C:\src\test\a.cpp:1002 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_2", type = "S1", valid ranges = , location = [0x0000000140001041, 0x0000000140001043) -> DW_OP_reg0 RAX # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001043 -v # CHECK: LineEntry: [0x000000014000103b-0x0000000140001045): C:\src\test\a.cpp:1002 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_2", type = "S1", valid ranges = , location = [0x0000000140001043, 0x0000000140001044) -> DW_OP_reg11 R11 # CHECK-EMPTY: @@ -505,33 +525,41 @@ main: # @main .short .Ltmp109-.Ltmp108 # CHECK: (lldb) image lookup -a 0x140001045 -v # CHECK: LineEntry: [0x0000000140001045-0x000000014000104e): C:\src\test\a.cpp:1003 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_3", type = "S1", valid ranges = , location = [0x0000000140001045, 0x0000000140001046) -> DW_OP_regx 0x3f, DW_OP_piece 0x1, DW_OP_piece 0x7 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001046 -v # CHECK: LineEntry: [0x0000000140001045-0x000000014000104e): C:\src\test\a.cpp:1003 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_3", type = "S1", valid ranges = , location = [0x0000000140001046, 0x0000000140001047) -> DW_OP_regx 0x3f, DW_OP_piece 0x1, DW_OP_piece 0x3, DW_OP_reg24 XMM7, DW_OP_piece 0x4 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001047 -v # CHECK: LineEntry: [0x0000000140001045-0x000000014000104e): C:\src\test\a.cpp:1003 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_3", type = "S1", valid ranges = , location = [0x0000000140001047, 0x0000000140001048) -> DW_OP_reg3 RBX # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001048 -v # CHECK: LineEntry: [0x0000000140001045-0x000000014000104e): C:\src\test\a.cpp:1003 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_3", type = "S1", valid ranges = , location = [0x0000000140001048, 0x0000000140001049) -> DW_OP_regx 0x3f, DW_OP_piece 0x1, DW_OP_piece 0x3, DW_OP_reg24 XMM7, DW_OP_piece 0x4 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x140001049 -v # CHECK: LineEntry: [0x0000000140001045-0x000000014000104e): C:\src\test\a.cpp:1003 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_3", type = "S1", valid ranges = , location = [0x0000000140001049, 0x000000014000104a) -> DW_OP_reg0 RAX # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x14000104a -v # CHECK: LineEntry: [0x0000000140001045-0x000000014000104e): C:\src\test\a.cpp:1003 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x14000104b -v # CHECK: LineEntry: [0x0000000140001045-0x000000014000104e): C:\src\test\a.cpp:1003 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_3", type = "S1", valid ranges = , location = [0x000000014000104b, 0x000000014000104e) -> DW_OP_reg2 RCX # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x14000104c -v # CHECK: LineEntry: [0x0000000140001045-0x000000014000104e): C:\src\test\a.cpp:1003 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "overlapped_ranges_3", type = "S1", valid ranges = , location = [0x000000014000104b, 0x000000014000104e) -> DW_OP_reg2 RCX # CHECK-EMPTY: @@ -549,10 +577,12 @@ main: # @main .short 4431 # Record kind: S_PROC_ID_END # CHECK: (lldb) image lookup -a 0x14000104e -v # CHECK: LineEntry: [0x000000014000104e-0x0000000140001050): C:\src\test\a.cpp:1004 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "simple_type1", type = "int64_t", valid ranges = , location = [0x000000014000104e, 0x000000014000104f) -> DW_OP_reg26 XMM9, DW_OP_piece 0x4, DW_OP_reg24 XMM7, DW_OP_piece 0x4 # CHECK-EMPTY: # CHECK: (lldb) image lookup -a 0x14000104f -v # CHECK: LineEntry: [0x000000014000104e-0x0000000140001050): C:\src\test\a.cpp:1004 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "simple_type1", type = "int64_t", valid ranges = , location = [0x000000014000104f, 0x0000000140001050) -> DW_OP_reg26 XMM9, DW_OP_piece 0x4, DW_OP_piece 0x4 # CHECK-EMPTY: diff --git a/lldb/test/Shell/SymbolFile/NativePDB/nested-blocks-same-address.s b/lldb/test/Shell/SymbolFile/NativePDB/nested-blocks-same-address.s index dc3ee844fe364..e51b280d4213e 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/nested-blocks-same-address.s +++ b/lldb/test/Shell/SymbolFile/NativePDB/nested-blocks-same-address.s @@ -30,6 +30,7 @@ # CHECK-NEXT: id = {{.*}}, range = [0x140001025-0x140001046) # CHECK-NEXT: id = {{.*}}, range = [0x140001025-0x140001046) # CHECK-NEXT: LineEntry: [0x0000000140001035-0x0000000140001046): /tmp/test.cpp:10 +# CHECK-NEXT: Symbol: id = {{.*}}, range = [0x0000000140001020-0x000000014000104d), name="main" # CHECK-NEXT: Variable: id = {{.*}}, name = "path", type = "volatile char[10]", valid ranges = , location = [0x0000000140001025, 0x0000000140001046) -> DW_OP_breg7 RSP+40, decl = # CHECK-NEXT: Variable: id = {{.*}}, name = "kMfDLL", type = "const char *", valid ranges = , location = [0x000000014000103c, 0x0000000140001046) -> DW_OP_reg2 RCX, decl = # CHECK-NEXT: Variable: id = {{.*}}, name = "__range1", type = "const char *const (&)[1]", valid ranges = , location = , decl = diff --git a/lldb/test/Shell/SymbolFile/NativePDB/symtab.cpp b/lldb/test/Shell/SymbolFile/NativePDB/symtab.cpp new file mode 100644 index 0000000000000..81d643d9572d8 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/NativePDB/symtab.cpp @@ -0,0 +1,59 @@ +// REQUIRES: x86 + +// Test symtab reading +// RUN: %build --compiler=clang-cl --arch=64 --nodefaultlib -o %t.exe -- %s +// RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symtab %t.exe --find-symbols-by-regex=".*" | FileCheck %s +// RUN: env LLDB_USE_NATIVE_PDB_READER=0 lldb-test symtab %t.exe --find-symbols-by-regex=".*" | FileCheck %s + +struct A { + void something() {} +}; + +namespace ns { +template struct B { + struct C { + static int static_fn() { return 1; } + }; + + int b_func() const { return 3; } +}; + +struct Dyn { + virtual ~Dyn() = default; +}; + +int a_function() { return 1; } +} // namespace ns + +void *operator new(unsigned long long n) { return nullptr; } +void operator delete(void *p, unsigned long long i) {} + +A global_a; +ns::B::C global_c; +int global_int; + +int main(int argc, char **argv) { + A a; + a.something(); + ns::B::C::static_fn(); + ns::B::C::static_fn(); + ns::B b; + ns::Dyn dyn; + return ns::a_function() + b.b_func(); +} + +// CHECK-DAG: Code {{.*}} main +// CHECK-DAG: Code {{.*}} ?b_func@?$B@F@ns@@QEBAHXZ +// CHECK-DAG: Code {{.*}} ?something@A@@QEAAXXZ +// CHECK-DAG: Code {{.*}} ??_GDyn@ns@@UEAAPEAXI@Z +// CHECK-DAG: Code {{.*}} ??2@YAPEAX_K@Z +// CHECK-DAG: Code {{.*}} ??3@YAXPEAX_K@Z +// CHECK-DAG: Code {{.*}} ?static_fn@C@?$B@H@ns@@SAHXZ +// CHECK-DAG: Code {{.*}} ?a_function@ns@@YAHXZ +// CHECK-DAG: Code {{.*}} ?static_fn@C@?$B@_N@ns@@SAHXZ +// CHECK-DAG: Code {{.*}} ??1Dyn@ns@@UEAA@XZ +// CHECK-DAG: Code {{.*}} ??0Dyn@ns@@QEAA@XZ +// CHECK-DAG: Data {{.*}} ?global_int@@3HA +// CHECK-DAG: Data {{.*}} ??_7Dyn@ns@@6B@ +// CHECK-DAG: Data {{.*}} ?global_a@@3UA@@A +// CHECK-DAG: Data {{.*}} ?global_c@@3UC@?$B@_J@ns@@A diff --git a/lldb/test/Shell/Unwind/windows-unaligned-x86_64.test b/lldb/test/Shell/Unwind/windows-unaligned-x86_64.test index 0356960424328..9f0a97527de4e 100644 --- a/lldb/test/Shell/Unwind/windows-unaligned-x86_64.test +++ b/lldb/test/Shell/Unwind/windows-unaligned-x86_64.test @@ -5,7 +5,8 @@ # REQUIRES: target-x86_64, native, system-windows # RUN: %build %p/Inputs/windows-unaligned-x86_64.cpp %p/Inputs/windows-unaligned-x86_64-asm.s -o %t -# RUN: %lldb %t -s %s -o exit | FileCheck %s +# RUN: env LLDB_USE_NATIVE_PDB_READER=0 %lldb %t -s %s -o exit | FileCheck %s +# RUN: env LLDB_USE_NATIVE_PDB_READER=1 %lldb %t -s %s -o exit | FileCheck %s # Future TODO: If %build could compile the source file in C mode, the symbol # name handling would be easier across msvc and mingw build configurations. From 43561ad204329667954d5ce1e632e09335f0ab12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Spaits?= Date: Thu, 11 Sep 2025 22:52:16 +0200 Subject: [PATCH 032/734] [BasicBlockUtils] Handle funclets when detaching EH pad blocks (#157363) Fixes #148052 . When removing EH Pad blocks, the value defined by them becomes poison. These poison values are then used by `catchret` and `cleanupret`, which is invalid. This commit replaces those unreachable `catchret` and `cleanupret` instructions with `unreachable`. --- llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 44 ++++- .../unreachable-multi-basic-block-funclet.ll | 169 ++++++++++++++++++ 2 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index cad0b4c12b54e..d2391e166f942 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -58,6 +58,19 @@ static cl::opt MaxDeoptOrUnreachableSuccessorCheckDepth( "is followed by a block that either has a terminating " "deoptimizing call or is terminated with an unreachable")); +static void replaceFuncletPadsRetWithUnreachable(Instruction &I) { + assert(isa(I) && "Instruction must be a funclet pad!"); + for (User *User : make_early_inc_range(I.users())) { + Instruction *ReturnInstr = dyn_cast(User); + if (isa(ReturnInstr) || + isa(ReturnInstr)) { + BasicBlock *ReturnInstrBB = ReturnInstr->getParent(); + ReturnInstr->eraseFromParent(); + new UnreachableInst(ReturnInstrBB->getContext(), ReturnInstrBB); + } + } +} + void llvm::detachDeadBlocks( ArrayRef BBs, SmallVectorImpl *Updates, @@ -75,7 +88,36 @@ void llvm::detachDeadBlocks( // Zap all the instructions in the block. while (!BB->empty()) { Instruction &I = BB->back(); - // If this instruction is used, replace uses with an arbitrary value. + // Exception handling funclets need to be explicitly addressed. + // These funclets must begin with cleanuppad or catchpad and end with + // cleanupred or catchret. The return instructions can be in different + // basic blocks than the pad instruction. If we would only delete the + // first block, the we would have possible cleanupret and catchret + // instructions with poison arguments, which wouldn't be valid. + if (isa(I)) + replaceFuncletPadsRetWithUnreachable(I); + + // Catchswitch instructions have handlers, that must be catchpads and + // an unwind label, that is either a catchpad or catchswitch. + if (CatchSwitchInst *CSI = dyn_cast(&I)) { + // Iterating over the handlers and the unwind basic block and processing + // catchpads. If the unwind label is a catchswitch, we just replace the + // label with poison later on. + for (unsigned I = 0; I < CSI->getNumSuccessors(); I++) { + BasicBlock *SucBlock = CSI->getSuccessor(I); + Instruction &SucFstInst = *(SucBlock->getFirstNonPHIIt()); + if (isa(SucFstInst)) { + replaceFuncletPadsRetWithUnreachable(SucFstInst); + // There may be catchswitch instructions using the catchpad. + // Just replace those with poison. + if (!SucFstInst.use_empty()) + SucFstInst.replaceAllUsesWith( + PoisonValue::get(SucFstInst.getType())); + SucFstInst.eraseFromParent(); + } + } + } + // Because control flow can't get here, we don't care what we replace the // value with. Note that since this block is unreachable, and all values // contained within it must dominate their uses, that all uses will diff --git a/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll b/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll new file mode 100644 index 0000000000000..d2fccae6770db --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll @@ -0,0 +1,169 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s + +; cleanuppad/cleanupret + +define void @unreachable_cleanuppad_linear(i64 %shapes.1) personality ptr null { +; CHECK-LABEL: define void @unreachable_cleanuppad_linear( +; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] +; CHECK-NEXT: ret void +; +start: + %_7 = icmp ult i64 0, %shapes.1 + ret void + +funclet: + %cleanuppad = cleanuppad within none [] + br label %funclet_end + +funclet_end: + cleanupret from %cleanuppad unwind to caller +} + +define void @unreachable_cleanuppad_multiple_predecessors(i64 %shapes.1) personality ptr null { +; CHECK-LABEL: define void @unreachable_cleanuppad_multiple_predecessors( +; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] +; CHECK-NEXT: ret void +; +start: + %_7 = icmp ult i64 0, %shapes.1 + ret void + +funclet: + %cleanuppad = cleanuppad within none [] + switch i64 %shapes.1, label %otherwise [ i64 0, label %one + i64 1, label %two + i64 42, label %three ] +one: + br label %funclet_end + +two: + br label %funclet_end + +three: + br label %funclet_end + +otherwise: + br label %funclet_end + +funclet_end: + cleanupret from %cleanuppad unwind to caller +} + +; catchpad/catchret + +define void @unreachable_catchpad_linear(i64 %shapes.1) personality ptr null { +; CHECK-LABEL: define void @unreachable_catchpad_linear( +; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] +; CHECK-NEXT: ret void +; +start: + %_7 = icmp ult i64 0, %shapes.1 + ret void + +dispatch: + %cs = catchswitch within none [label %funclet] unwind to caller + +funclet: + %cleanuppad = catchpad within %cs [] + br label %funclet_end + + +funclet_end: + catchret from %cleanuppad to label %unreachable + +unreachable: + unreachable +} + +define void @unreachable_catchpad_multiple_predecessors(i64 %shapes.1) personality ptr null { +; CHECK-LABEL: define void @unreachable_catchpad_multiple_predecessors( +; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] +; CHECK-NEXT: ret void +; +start: + %_7 = icmp ult i64 0, %shapes.1 + ret void + +dispatch: + %cs = catchswitch within none [label %funclet] unwind to caller + +funclet: + %cleanuppad = catchpad within %cs [] + switch i64 %shapes.1, label %otherwise [ i64 0, label %one + i64 1, label %two + i64 42, label %three ] +one: + br label %funclet_end + +two: + br label %funclet_end + +three: + br label %funclet_end + +otherwise: + br label %funclet_end + +funclet_end: + catchret from %cleanuppad to label %unreachable + +unreachable: + unreachable +} + +; Issue reproducer + +define void @gh148052(i64 %shapes.1) personality ptr null { +; CHECK-LABEL: define void @gh148052( +; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] +; CHECK-NEXT: call void @llvm.assume(i1 [[_7]]) +; CHECK-NEXT: ret void +; +start: + %_7 = icmp ult i64 0, %shapes.1 + br i1 %_7, label %bb1, label %panic + +bb1: + %_11 = icmp ult i64 0, %shapes.1 + br i1 %_11, label %bb3, label %panic1 + +panic: + unreachable + +bb3: + ret void + +panic1: + invoke void @func(i64 0, i64 0, ptr null) + to label %unreachable unwind label %funclet_bb14 + +funclet_bb14: + %cleanuppad = cleanuppad within none [] + br label %bb13 + +unreachable: + unreachable + +bb10: + cleanupret from %cleanuppad5 unwind to caller + +funclet_bb10: + %cleanuppad5 = cleanuppad within none [] + br label %bb10 + +bb13: + cleanupret from %cleanuppad unwind label %funclet_bb10 +} + +declare void @func(i64, i64, ptr) From e0817c642713b316e878a729236fa3ff99858cba Mon Sep 17 00:00:00 2001 From: jtstogel Date: Thu, 11 Sep 2025 14:15:01 -0700 Subject: [PATCH 033/734] [bazel] Redo LSP changes to fix bazel build (#158150) Reverts llvm/llvm-project#157691 Change was relanded in https://github.com/llvm/llvm-project/pull/157885 --- .../bazel/llvm-project-overlay/llvm/BUILD.bazel | 1 + .../llvm/unittests/BUILD.bazel | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index b042c183df9fb..e6f10b08932e5 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -228,6 +228,7 @@ cc_library( "lib/Support/*.cpp", "lib/Support/*.h", "lib/Support/*.inc", + "lib/Support/LSP/*.cpp", # To avoid a dependency cycle. "include/llvm/Option/*.h", ]) + select({ diff --git a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel index 3fa62bf708514..628a720a2bbb1 100644 --- a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel @@ -793,6 +793,23 @@ cc_test( ], ) +cc_test( + name = "SupportLSPTests", + size = "small", + srcs = glob(["Support/LSP/*.cpp"]), + copts = [ + "$(STACK_FRAME_UNLIMITED)", + ], + linkstatic = 1, + deps = [ + "//llvm:Support", + "//llvm:config", + "//third-party/unittest:gmock", + "//third-party/unittest:gtest", + "//third-party/unittest:gtest_main", + ], +) + cc_test( name = "tablegen_tests", size = "small", From 20e55f359dc424fa23392ad7ecd766294b60accc Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 11 Sep 2025 14:18:17 -0700 Subject: [PATCH 034/734] [lldb][NFC] Mark API test skipIfRemote to avoid a bot The lldb-remote-linux-ubuntu bot (and only this bot) is still failing for TestCortexMExceptionUnwind.py because the Target triple is somehow inheriting a non-Darwin OS. I marked this API test skipUnlessDarwin but this bot can be identified more specifically by a skipIfRemote test. There's no benefit to running this test remotely anyway; it doesn't execute any code. --- .../unwind/cortex-m-exception/TestCortexMExceptionUnwind.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py index 30b2a525eaab1..768dd6fe6867c 100644 --- a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py +++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py @@ -26,10 +26,7 @@ class TestCortexMExceptionUnwind(TestBase): # the frame pointer, and we can walk the stack. # ABISysV_arm::CreateDefaultUnwindPlan will only get one frame and # not be able to continue. - # - # This may only be occuring on a 32-bit Ubuntu bot; need to test - # 64-bit Ubuntu and confirm. - @skipUnlessDarwin + @skipIfRemote def test_no_fpu(self): """Test that we can backtrace correctly through an ARM Cortex-M Exception return stack""" From e87dc2ebf91063ee85394e67db989ad1f1bd55b5 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 11 Sep 2025 14:26:13 -0700 Subject: [PATCH 035/734] [llvm-debuginfod] Update tests to work with internal shell (#158141) This patch updates the llvm-debuginfod tests to work with the lit internal shell. One test was missing env before environment variables and another was using a brace expansion. --- llvm/test/tools/llvm-debuginfod-find/cache.test | 5 +++-- llvm/test/tools/llvm-debuginfod/llvm-debuginfod.test | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/test/tools/llvm-debuginfod-find/cache.test b/llvm/test/tools/llvm-debuginfod-find/cache.test index eae341f3eb299..66af974a2596d 100644 --- a/llvm/test/tools/llvm-debuginfod-find/cache.test +++ b/llvm/test/tools/llvm-debuginfod-find/cache.test @@ -2,9 +2,10 @@ REQUIRES: curl UNSUPPORTED: system-windows RUN: rm -rf %t/* -RUN: mkdir -p %t/buildid/012345678901234{5,6} +RUN: mkdir -p %t/buildid/0123456789012345 +RUN: mkdir -p %t/buildid/0123456789012346 RUN: echo 'f' > %t/buildid/0123456789012345/debuginfo -RUN: cp %t/buildid/012345678901234{5,6}/debuginfo +RUN: cp %t/buildid/0123456789012345/debuginfo %t/buildid/0123456789012346/debuginfo RUN: mkdir %t/cache RUN: env DEBUGINFOD_CACHE_PATH=%t/cache DEBUGINFOD_URLS=file://%t \ RUN: llvm-debuginfod-find --debuginfo 0123456789012345 > /dev/null diff --git a/llvm/test/tools/llvm-debuginfod/llvm-debuginfod.test b/llvm/test/tools/llvm-debuginfod/llvm-debuginfod.test index edeae375a5079..c32c7b75e79c1 100644 --- a/llvm/test/tools/llvm-debuginfod/llvm-debuginfod.test +++ b/llvm/test/tools/llvm-debuginfod/llvm-debuginfod.test @@ -13,20 +13,20 @@ # RUN: rm -rf %t # RUN: mkdir %t # # Query the debuginfod server for artifacts -# RUN: DEBUGINFOD_CACHE_PATH=%t %python %s --server-cmd 'llvm-debuginfod -v -c 3 %S/Inputs' \ +# RUN: env DEBUGINFOD_CACHE_PATH=%t %python %s --server-cmd 'llvm-debuginfod -v -c 3 %S/Inputs' \ # RUN: --tool-cmd 'llvm-debuginfod-find --dump --executable 2c39b7557c50162aaeb5a3148c9f76e6e46012e3' | \ # RUN: diff - %S/Inputs/main.exe -# RUN: DEBUGINFOD_CACHE_PATH=%t %python %s --server-cmd 'llvm-debuginfod -v -c 3 %S/Inputs' \ +# RUN: env DEBUGINFOD_CACHE_PATH=%t %python %s --server-cmd 'llvm-debuginfod -v -c 3 %S/Inputs' \ # RUN: --tool-cmd 'llvm-debuginfod-find --dump --debuginfo 2c39b7557c50162aaeb5a3148c9f76e6e46012e3' | \ # RUN: diff - %S/Inputs/main-debug.exe # Debuginfod server does not yet support source files # # The artifacts should still be present in the cache without needing to query # # the server. -# RUN: DEBUGINFOD_CACHE_PATH=%t llvm-debuginfod-find --dump \ +# RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-debuginfod-find --dump \ # RUN: --executable 2c39b7557c50162aaeb5a3148c9f76e6e46012e3 | \ # RUN: diff - %S/Inputs/main.exe -# RUN: DEBUGINFOD_CACHE_PATH=%t llvm-debuginfod-find --dump \ +# RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-debuginfod-find --dump \ # RUN: --debuginfo 2c39b7557c50162aaeb5a3148c9f76e6e46012e3 | \ # RUN: diff - %S/Inputs/main-debug.exe From 73b24d27d8ab7a252fca4c55c1fe7848cab2029c Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 11 Sep 2025 21:27:27 +0000 Subject: [PATCH 036/734] Reapply "[llvm] Use lit internal shell by default" This reverts commit 5125f476b2f90ccf157c78d73bc6fe14c4413a27. This was reverted because it broke some debuginfod tests. Those have been fixed now in #158141. Relanding now and hoping it sticks this time. --- llvm/test/lit.cfg.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 867a44be56727..e8861e29be707 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -17,6 +17,17 @@ # name: The name of this test suite. config.name = "LLVM" +# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. +# See https://github.com/llvm/llvm-project/issues/106636 for more details. +# +# We prefer the lit internal shell which provides a better user experience on failures +# and is faster unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 +# env var. +use_lit_shell = True +lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL") +if lit_shell_env: + use_lit_shell = lit.util.pythonize_bool(lit_shell_env) + # testFormat: The test format to use to interpret tests. extra_substitutions = extra_substitutions = ( [ @@ -26,9 +37,7 @@ if config.enable_profcheck else [] ) -config.test_format = lit.formats.ShTest( - not llvm_config.use_lit_shell, extra_substitutions -) +config.test_format = lit.formats.ShTest(not use_lit_shell, extra_substitutions) # suffixes: A list of file extensions to treat as test files. This is overriden # by individual lit.local.cfg files in the test subdirectories. From 2f9a458f35ccd0fc45067afda346fd59052d0c0c Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Thu, 11 Sep 2025 14:49:59 -0700 Subject: [PATCH 037/734] [WebKit checkers] Treat asm brk as trivial (#155046) Like other functions which results in abort, treat asm brk instruction as trivial. --- .../Checkers/WebKit/PtrTypesSemantics.cpp | 4 ++++ .../WebKit/trivial-code-check-asm-brk.cpp | 22 +++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 clang/test/Analysis/Checkers/WebKit/trivial-code-check-asm-brk.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 884dbe90e7b12..56747d72136e3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -666,6 +666,10 @@ class TrivialFunctionAnalysisVisitor return IsFunctionTrivial(Callee); } + bool VisitGCCAsmStmt(const GCCAsmStmt *AS) { + return AS->getAsmString() == "brk #0xc471"; + } + bool VisitSubstNonTypeTemplateParmExpr(const SubstNonTypeTemplateParmExpr *E) { // Non-type template paramter is compile time constant and trivial. diff --git a/clang/test/Analysis/Checkers/WebKit/trivial-code-check-asm-brk.cpp b/clang/test/Analysis/Checkers/WebKit/trivial-code-check-asm-brk.cpp new file mode 100644 index 0000000000000..de98c77eb7347 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/trivial-code-check-asm-brk.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_analyze_cc1 -triple arm-darwin -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s +// expected-no-diagnostics + +void crash() +{ + __asm__ volatile ("brk #0xc471"); + __builtin_unreachable(); +} + +class SomeObj { +public: + void ref(); + void deref(); + + void someWork() { crash(); } +}; + +SomeObj* provide(); + +void doSomeWork() { + provide()->someWork(); +} From 6272540b9f7e5fbd3dab8aaca103012a4dc1faa3 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 17:27:24 -0400 Subject: [PATCH 038/734] [libc++] Properly extract the PR head and base from the Github event --- .github/workflows/libcxx-run-benchmarks.yml | 47 ++++++++++++++------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml index a5535a0033b8d..764686b0b4e09 100644 --- a/.github/workflows/libcxx-run-benchmarks.yml +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -21,9 +21,6 @@ on: env: CC: clang-22 CXX: clang++-22 - COMMENT_BODY: ${{ github.event.comment.body }} - PULL_REQUEST_HEAD: ${{ github.event.issue.pull_request.head.sha }} - PULL_REQUEST_BASE: ${{ github.event.issue.pull_request.base.sha }} jobs: run-benchmarks: @@ -33,12 +30,6 @@ jobs: runs-on: llvm-premerge-libcxx-next-runners # TODO: This should run on a dedicated set of machines steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${PULL_REQUEST_HEAD} - fetch-depth: 0 - fetch-tags: true # This job requires access to all the Git branches so it can diff against (usually) main - - uses: actions/setup-python@v6 with: python-version: '3.10' @@ -48,18 +39,42 @@ jobs: python3 -m venv .venv source .venv/bin/activate python -m pip install -r libcxx/utils/requirements.txt + python -m pip install pygithub + + - name: Extract information from the PR + id: vars + run: | + source .venv/bin/activate + cat <> ${GITHUB_OUTPUT} + import github + repo = github.Github("${{ github.token }}").get_repo("${{ github.repository }}") + pr = repo.get_pull(${{ github.event.issue.number }}) + print(f"pr_base={pr.base.sha}") + print(f"pr_head={pr.head.sha}") + EOF + BENCHMARKS=$(echo "${{ github.event.comment.body }}" | sed -nE 's/\/libcxx-bot benchmark (.+)/\1/p') + echo "benchmarks=${BENCHMARKS}" >> ${GITHUB_OUTPUT} + + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + ref: ${{ steps.vars.outputs.pr_head }} + fetch-depth: 0 + fetch-tags: true # This job requires access to all the Git branches so it can diff against (usually) main + path: repo # Avoid nuking the workspace, where we have the Python virtualenv - name: Run baseline run: | - BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -nE 's/\/libcxx-bot benchmark (.+)/\1/p') - baseline_commit=$(git merge-base ${PULL_REQUEST_BASE} ${PULL_REQUEST_SHA}) - ./libcxx/utils/test-at-commit --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${BENCHMARKS} + source .venv/bin/activate + baseline_commit=$(git -C repo merge-base ${{ steps.vars.outputs.pr_base }} ${{ steps.vars.outputs.pr_head }}) + ./repo/libcxx/utils/test-at-commit --git-repo repo --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }} - name: Run candidate run: | - BENCHMARKS=$(echo "${COMMENT_BODY}" | sed -nE 's/\/libcxx-bot benchmark (.+)/\1/p') - ./libcxx/utils/test-at-commit --commit ${PULL_REQUEST_SHA} -B build/candidate -- -sv -j1 --param optimization=speed ${BENCHMARKS} + source .venv/bin/activate + ./repo/libcxx/utils/test-at-commit --git-repo repo --commit ${{ steps.vars.outputs.pr_head }} -B build/candidate -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }} - name: Compare baseline and candidate runs - run: ./libcxx/utils/compare-benchmarks <(./libcxx/utils/consolidate-benchmarks build/baseline) \ - <(./libcxx/utils/consolidate-benchmarks build/candidate) + run: | + source .venv/bin/activate + ./repo/libcxx/utils/compare-benchmarks <(./repo/libcxx/utils/consolidate-benchmarks build/baseline) \ + <(./repo/libcxx/utils/consolidate-benchmarks build/candidate) From 7d249cf094357d729f866f8366765372daccbc49 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 11 Sep 2025 15:00:14 -0700 Subject: [PATCH 039/734] [RISCV] Add helper functions to detect CLZ/CTZ/CPOP-like support. (#158148) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 16 ++++++++-------- llvm/lib/Target/RISCV/RISCVSubtarget.h | 11 +++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 037eec05e4301..4f137756d2f48 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -403,12 +403,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, Legal); } - if (Subtarget.hasStdExtZbb() || - (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) { + if (Subtarget.hasCTZLike()) { if (Subtarget.is64Bit()) setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); } else { setOperationAction(ISD::CTTZ, XLenVT, Expand); + } + + if (!Subtarget.hasCPOPLike()) { // TODO: These should be set to LibCall, but this currently breaks // the Linux kernel build. See #101786. Lacks i128 tests, too. if (Subtarget.is64Bit()) @@ -418,8 +420,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::i64, Expand); } - if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || - (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) { + if (Subtarget.hasCLZLike()) { // We need the custom lowering to make sure that the resulting sequence // for the 32bit case is efficient on 64bit targets. // Use default promotion for i32 without Zbb. @@ -2158,13 +2159,11 @@ bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { } bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { - return Subtarget.hasStdExtZbb() || - (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()); + return Subtarget.hasCTZLike(); } bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { - return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || - (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()); + return Subtarget.hasCLZLike(); } bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( @@ -24843,6 +24842,7 @@ bool RISCVTargetLowering::isCtpopFast(EVT VT) const { return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) return true; + // FIXME: Should use hasCPOPLike here. return Subtarget.hasStdExtZbb() && (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 50e76df56e575..0d9cd16a77937 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -186,6 +186,17 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { return HasStdExtZfhmin || HasStdExtZfbfmin; } + bool hasCLZLike() const { + return HasStdExtZbb || HasVendorXTHeadBb || + (HasVendorXCVbitmanip && !IsRV64); + } + bool hasCTZLike() const { + return HasStdExtZbb || (HasVendorXCVbitmanip && !IsRV64); + } + bool hasCPOPLike() const { + return HasStdExtZbb || (HasVendorXCVbitmanip && !IsRV64); + } + bool hasBEXTILike() const { return HasStdExtZbs || HasVendorXTHeadBs; } bool hasCZEROLike() const { From c9395512e8b45dc1a341ae0061d75bbeeeb7a595 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 18:05:06 -0400 Subject: [PATCH 040/734] [libc++] Install dependencies right before they're needed This solves a tricky issue where we can't install the libc++ dependencies until after we've checked out the monorepo. --- .github/workflows/libcxx-run-benchmarks.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml index 764686b0b4e09..992c5ea0c2dc0 100644 --- a/.github/workflows/libcxx-run-benchmarks.yml +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -34,17 +34,12 @@ jobs: with: python-version: '3.10' - - name: Install dependencies - run: | - python3 -m venv .venv - source .venv/bin/activate - python -m pip install -r libcxx/utils/requirements.txt - python -m pip install pygithub - - name: Extract information from the PR id: vars run: | + python3 -m venv .venv source .venv/bin/activate + python -m pip install pygithub cat <> ${GITHUB_OUTPUT} import github repo = github.Github("${{ github.token }}").get_repo("${{ github.repository }}") @@ -65,6 +60,7 @@ jobs: - name: Run baseline run: | source .venv/bin/activate + python -m pip install -r repo/libcxx/utils/requirements.txt baseline_commit=$(git -C repo merge-base ${{ steps.vars.outputs.pr_base }} ${{ steps.vars.outputs.pr_head }}) ./repo/libcxx/utils/test-at-commit --git-repo repo --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }} From 1dfd1313e5e9e64e2355d5886b125c551b171b38 Mon Sep 17 00:00:00 2001 From: jtstogel Date: Thu, 11 Sep 2025 15:06:27 -0700 Subject: [PATCH 041/734] [bazel] Fix bazel test by specifying unroll-elements.mlir as data (#158158) `mlir/test/Dialect/Vector/td/unroll-elements.mlir` is fed as a data dependency into`mlir/test/Dialect/Vector/vector-to-elements-lowering.mlir` added in [#157142](https://github.com/llvm/llvm-project/pull/157142). The Bazel rule here automatically picks up all mlir files as tests, which leads to `vector-to-elements-lowering` failing. --- utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel index b8d136c174bd4..daf639fbcee31 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel @@ -9,6 +9,7 @@ package(default_visibility = ["//visibility:public"]) name = "%s.test" % src, srcs = [src], data = [ + "Vector/td/unroll-elements.mlir", "Vector/vector-sink-transform.mlir", "//llvm:llvm-symbolizer", "//mlir:mlir-opt", @@ -33,6 +34,7 @@ package(default_visibility = ["//visibility:public"]) "LLVM/*-symbol-def.mlir", "Transform/*-symbol-decl-and-schedule.mlir", "Transform/include/**/*.mlir", + "Vector/td/unroll-elements.mlir", "Vector/vector-sink-transform.mlir", ], ) From d2f67c3bf735fe29a0c7fd1fac6939c4d6737b9a Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 18:13:07 -0400 Subject: [PATCH 042/734] [libc++] Make sure we forward the git repository from test-at-commit to build-at-commit --- libcxx/utils/test-at-commit | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libcxx/utils/test-at-commit b/libcxx/utils/test-at-commit index 1ef1ec0c52815..5b3fcede48ab2 100755 --- a/libcxx/utils/test-at-commit +++ b/libcxx/utils/test-at-commit @@ -70,7 +70,9 @@ def main(argv): with tempfile.TemporaryDirectory() as install_dir: # Build the library at the baseline - build_cmd = [os.path.join(PARENT_DIR, 'build-at-commit'), '--install-dir', install_dir, '--commit', args.commit] + build_cmd = [os.path.join(PARENT_DIR, 'build-at-commit'), '--git-repo', args.git_repo, + '--install-dir', install_dir, + '--commit', args.commit] build_cmd += ['--', '-DCMAKE_BUILD_TYPE=RelWithDebInfo'] subprocess.check_call(build_cmd) From 607a813029c815fb3529ae7338548cc3f492b437 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 11 Sep 2025 23:16:37 +0100 Subject: [PATCH 043/734] [ConstFold] Don't crash on ConstantExprs when folding get_active_lane_m. Check if operands are ConstantInt to avoid crashing on constant expression after https://github.com/llvm/llvm-project/pull/156659. --- llvm/lib/Analysis/ConstantFolding.cpp | 6 +-- .../ConstProp/active-lane-mask.ll | 37 +++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 07c6ba8ae7d9e..a3b2e62a1b8ba 100755 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -4253,9 +4253,9 @@ static Constant *ConstantFoldScalableVectorCall( return ConstantInt::getFalse(SVTy); } case Intrinsic::get_active_lane_mask: { - auto Op0 = cast(Operands[0])->getValue(); - auto Op1 = cast(Operands[1])->getValue(); - if (Op0.uge(Op1)) + auto *Op0 = dyn_cast(Operands[0]); + auto *Op1 = dyn_cast(Operands[1]); + if (Op0 && Op1 && Op0->getValue().uge(Op1->getValue())) return ConstantVector::getNullValue(SVTy); break; } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll index ed26deb58eae4..9de0c597305b0 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll @@ -3,6 +3,8 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +@glob = external global i32 + define <16 x i1> @v16i1_0() { ; CHECK-LABEL: @v16i1_0( ; CHECK-NEXT: entry: @@ -337,6 +339,41 @@ entry: ret %mask } + +define @nxv16i1_0_constexpr() { +; CHECK-LABEL: @nxv16i1_0_constexpr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 ptrtoint (ptr @glob to i64)) +; CHECK-NEXT: ret [[MASK]] +; +entry: + %mask = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 ptrtoint (ptr @glob to i64)) + ret %mask +} + +define @nxv16i1_constexpr_0() { +; CHECK-LABEL: @nxv16i1_constexpr_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 ptrtoint (ptr @glob to i64), i64 0) +; CHECK-NEXT: ret [[MASK]] +; +entry: + %mask = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 ptrtoint (ptr @glob to i64), i64 0) + ret %mask +} + +define @nxv16i1_constexpr_constexpr() { +; CHECK-LABEL: @nxv16i1_constexpr_constexpr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 ptrtoint (ptr @glob to i64), i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @glob, i64 2) to i64)) +; CHECK-NEXT: ret [[MASK]] +; +entry: + %mask = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 ptrtoint (ptr @glob to i64), i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @glob, i64 2) to i64)) + ret %mask +} + + declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) From 23d1ec64f7fe630ac08a49652b649690971051f3 Mon Sep 17 00:00:00 2001 From: David Tellenbach Date: Fri, 12 Sep 2025 00:22:55 +0200 Subject: [PATCH 044/734] [AArch64][MIR] Serialize AArch64MachineFunctionInfo::HasStackFrame to MIR (#158122) This patch adds serialization of AArch64MachineFunctionInfo::HasStackFrame into MIR. --- .../AArch64/AArch64MachineFunctionInfo.cpp | 7 +++- .../AArch64/AArch64MachineFunctionInfo.h | 2 + .../CodeGen/MIR/AArch64/hasstackframe.mir | 41 +++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/MIR/AArch64/hasstackframe.mir diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index b4197a04840b7..a81f5b3d436a9 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -28,7 +28,10 @@ yaml::AArch64FunctionInfo::AArch64FunctionInfo( : HasRedZone(MFI.hasRedZone()), StackSizeSVE(MFI.hasCalculatedStackSizeSVE() ? std::optional(MFI.getStackSizeSVE()) - : std::nullopt) {} + : std::nullopt), + HasStackFrame(MFI.hasStackFrame() + ? std::optional(MFI.hasStackFrame()) + : std::nullopt) {} void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) { MappingTraits::mapping(YamlIO, *this); @@ -40,6 +43,8 @@ void AArch64FunctionInfo::initializeBaseYamlFields( HasRedZone = YamlMFI.HasRedZone; if (YamlMFI.StackSizeSVE) setStackSizeSVE(*YamlMFI.StackSizeSVE); + if (YamlMFI.HasStackFrame) + setHasStackFrame(*YamlMFI.HasStackFrame); } static std::pair GetSignReturnAddress(const Function &F) { diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 993cff112ba84..98fd018bf33a9 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -600,6 +600,7 @@ namespace yaml { struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo { std::optional HasRedZone; std::optional StackSizeSVE; + std::optional HasStackFrame; AArch64FunctionInfo() = default; AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI); @@ -612,6 +613,7 @@ template <> struct MappingTraits { static void mapping(IO &YamlIO, AArch64FunctionInfo &MFI) { YamlIO.mapOptional("hasRedZone", MFI.HasRedZone); YamlIO.mapOptional("stackSizeSVE", MFI.StackSizeSVE); + YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame); } }; diff --git a/llvm/test/CodeGen/MIR/AArch64/hasstackframe.mir b/llvm/test/CodeGen/MIR/AArch64/hasstackframe.mir new file mode 100644 index 0000000000000..bf3d8ec478d18 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AArch64/hasstackframe.mir @@ -0,0 +1,41 @@ +# RUN: llc -run-pass=prologepilog -mtriple arm64-apple-ios -o - -simplify-mir \ +# RUN: -verify-machineinstrs %s | FileCheck %s + +# CHECK: hasStackFrame: true + +--- | + + define i32 @f(i32 %a, i32 %b) #0 { + %local_array = alloca [10 x i32], align 4 + %temp = alloca i32, align 4 + store i32 %a, ptr %temp, align 4 + %loaded = load i32, ptr %temp, align 4 + %gep = getelementptr inbounds [10 x i32], ptr %local_array, i64 0, i64 5 + store i32 %loaded, ptr %gep, align 4 + %result = add i32 %loaded, %b + %blah = call i32 @foo(i32 noundef %result) + ret i32 %blah + } + + declare i32 @foo(i32 noundef) + +... +--- +name: f +frameInfo: + adjustsStack: true +stack: + - { id: 0, name: local_array, size: 40, alignment: 4, local-offset: -40 } + - { id: 1, name: temp, size: 4, alignment: 4, local-offset: -44 } +body: | + bb.0: + liveins: $w0, $w1 + + STRWui renamable $w0, %stack.1.temp, 0 + STRWui renamable $w0, %stack.0.local_array, 5 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $w0 = ADDWrr killed renamable $w0, killed renamable $w1 + BL @foo, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + RET_ReallyLR implicit $w0 +... From c989f85cdb77db88a16ec6e490d8f16312e6f1a6 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 21 Aug 2025 10:32:04 -0700 Subject: [PATCH 045/734] [MLIR] Apply clang-tidy fixes for performance-unnecessary-value-param in ValueBoundsOpInterface.cpp (NFC) --- .../mlir/Interfaces/ValueBoundsOpInterface.h | 20 ++++++++------- .../lib/Interfaces/ValueBoundsOpInterface.cpp | 25 ++++++++++--------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h index d168735f50598..58852239444b9 100644 --- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h +++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h @@ -217,7 +217,7 @@ class ValueBoundsConstraintSet /// `closedUB` is set to "true", upper bounds are also closed. static FailureOr computeConstantBound(presburger::BoundType type, const Variable &var, - StopConditionFn stopCondition = nullptr, + const StopConditionFn &stopCondition = nullptr, bool closedUB = false); /// Compute a constant delta between the given two values. Return "failure" @@ -282,18 +282,18 @@ class ValueBoundsConstraintSet /// /// Slice are non-overlapping if the above constraint is not satisfied for /// at least one dimension. - static FailureOr areOverlappingSlices(MLIRContext *ctx, - HyperrectangularSlice slice1, - HyperrectangularSlice slice2); + static FailureOr + areOverlappingSlices(MLIRContext *ctx, const HyperrectangularSlice &slice1, + const HyperrectangularSlice &slice2); /// Return "true" if the given slices are guaranteed to be equivalent. /// Return "false" if the given slices are guaranteed to be non-equivalent. /// Return "failure" if unknown. /// /// Slices are equivalent if their offsets, sizes and strices are equal. - static FailureOr areEquivalentSlices(MLIRContext *ctx, - HyperrectangularSlice slice1, - HyperrectangularSlice slice2); + static FailureOr + areEquivalentSlices(MLIRContext *ctx, const HyperrectangularSlice &slice1, + const HyperrectangularSlice &slice2); /// Add a bound for the given index-typed value or shaped value. This function /// returns a builder that adds the bound. @@ -326,7 +326,8 @@ class ValueBoundsConstraintSet /// An index-typed value or the dimension of a shaped-type value. using ValueDim = std::pair; - ValueBoundsConstraintSet(MLIRContext *ctx, StopConditionFn stopCondition, + ValueBoundsConstraintSet(MLIRContext *ctx, + const StopConditionFn &stopCondition, bool addConservativeSemiAffineBounds = false); /// Return "true" if, based on the current state of the constraint system, @@ -401,7 +402,8 @@ class ValueBoundsConstraintSet /// Insert the given affine map and its bound operands as a new column in the /// constraint system. Return the position of the new column. Any operands /// that were not analyzed yet are put on the worklist. - int64_t insert(AffineMap map, ValueDimList operands, bool isSymbol = true); + int64_t insert(AffineMap map, const ValueDimList &operands, + bool isSymbol = true); int64_t insert(const Variable &var, bool isSymbol = true); /// Project out the given column in the constraint set. diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index caa909186eb2c..d2bafb701046e 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/Interfaces/ValueBoundsOpInterface.h" #include "mlir/IR/BuiltinTypes.h" @@ -151,7 +153,7 @@ ValueBoundsConstraintSet::Variable::Variable(AffineMap map, [](Value v) { return Variable(v); })) {} ValueBoundsConstraintSet::ValueBoundsConstraintSet( - MLIRContext *ctx, StopConditionFn stopCondition, + MLIRContext *ctx, const StopConditionFn &stopCondition, bool addConservativeSemiAffineBounds) : builder(ctx), stopCondition(stopCondition), addConservativeSemiAffineBounds(addConservativeSemiAffineBounds) { @@ -302,7 +304,8 @@ int64_t ValueBoundsConstraintSet::insert(bool isSymbol) { return pos; } -int64_t ValueBoundsConstraintSet::insert(AffineMap map, ValueDimList operands, +int64_t ValueBoundsConstraintSet::insert(AffineMap map, + const ValueDimList &operands, bool isSymbol) { assert(map.getNumResults() == 1 && "expected affine map with one result"); int64_t pos = insert(isSymbol); @@ -629,7 +632,7 @@ LogicalResult ValueBoundsConstraintSet::computeIndependentBound( FailureOr ValueBoundsConstraintSet::computeConstantBound( presburger::BoundType type, const Variable &var, - StopConditionFn stopCondition, bool closedUB) { + const StopConditionFn &stopCondition, bool closedUB) { // Default stop condition if none was specified: Keep adding constraints until // a bound could be computed. int64_t pos = 0; @@ -666,7 +669,7 @@ void ValueBoundsConstraintSet::populateConstraints(Value value, int64_t ValueBoundsConstraintSet::populateConstraints(AffineMap map, ValueDimList operands) { - int64_t pos = insert(map, operands, /*isSymbol=*/false); + int64_t pos = insert(map, std::move(operands), /*isSymbol=*/false); // Process the backward slice of `operands` (i.e., reverse use-def chain) // until `stopCondition` is met. processWorklist(); @@ -826,10 +829,9 @@ FailureOr ValueBoundsConstraintSet::areEqual(const Variable &var1, return strongCompare(var1, ComparisonOperator::EQ, var2); } -FailureOr -ValueBoundsConstraintSet::areOverlappingSlices(MLIRContext *ctx, - HyperrectangularSlice slice1, - HyperrectangularSlice slice2) { +FailureOr ValueBoundsConstraintSet::areOverlappingSlices( + MLIRContext *ctx, const HyperrectangularSlice &slice1, + const HyperrectangularSlice &slice2) { assert(slice1.getMixedOffsets().size() == slice2.getMixedOffsets().size() && "expected slices of same rank"); assert(slice1.getMixedSizes().size() == slice2.getMixedSizes().size() && @@ -891,10 +893,9 @@ ValueBoundsConstraintSet::areOverlappingSlices(MLIRContext *ctx, return true; } -FailureOr -ValueBoundsConstraintSet::areEquivalentSlices(MLIRContext *ctx, - HyperrectangularSlice slice1, - HyperrectangularSlice slice2) { +FailureOr ValueBoundsConstraintSet::areEquivalentSlices( + MLIRContext *ctx, const HyperrectangularSlice &slice1, + const HyperrectangularSlice &slice2) { assert(slice1.getMixedOffsets().size() == slice2.getMixedOffsets().size() && "expected slices of same rank"); assert(slice1.getMixedSizes().size() == slice2.getMixedSizes().size() && From d88c89f860c205cda2c07b59fbb9ede70130818f Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Thu, 11 Sep 2025 15:47:09 -0700 Subject: [PATCH 046/734] [DirectX] Removing dxbc StaticSampler from mcbxdc (#154631) MC Static Samplers Representation currently depends on Object structures. This PR removes that dependency and in order to facilitate removing to_underlying usage in follow-up PRs. --- llvm/include/llvm/BinaryFormat/DXContainer.h | 40 +++++ .../Frontend/HLSL/RootSignatureValidations.h | 4 - .../llvm/MC/DXContainerRootSignature.h | 18 ++- .../Frontend/HLSL/RootSignatureMetadata.cpp | 142 ++++++++---------- .../HLSL/RootSignatureValidations.cpp | 41 ----- llvm/lib/ObjectYAML/DXContainerEmitter.cpp | 26 +++- .../DXILPostOptimizationValidation.cpp | 2 +- .../RootSignature-StaticSamplers.yaml | 4 +- .../ObjectYAML/DXContainerYAMLTest.cpp | 4 +- 9 files changed, 142 insertions(+), 139 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h index facd137e9d9dd..c04380667a640 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainer.h +++ b/llvm/include/llvm/BinaryFormat/DXContainer.h @@ -228,6 +228,16 @@ enum class SamplerFilter : uint32_t { #include "DXContainerConstants.def" }; +#define FILTER(Val, Enum) \ + case Val: \ + return true; +inline bool isValidSamplerFilter(uint32_t V) { + switch (V) { +#include "DXContainerConstants.def" + } + return false; +} + LLVM_ABI ArrayRef> getSamplerFilters(); #define TEXTURE_ADDRESS_MODE(Val, Enum) Enum = Val, @@ -237,6 +247,16 @@ enum class TextureAddressMode : uint32_t { LLVM_ABI ArrayRef> getTextureAddressModes(); +#define TEXTURE_ADDRESS_MODE(Val, Enum) \ + case Val: \ + return true; +inline bool isValidAddress(uint32_t V) { + switch (V) { +#include "DXContainerConstants.def" + } + return false; +} + #define COMPARISON_FUNC(Val, Enum) Enum = Val, enum class ComparisonFunc : uint32_t { #include "DXContainerConstants.def" @@ -244,11 +264,31 @@ enum class ComparisonFunc : uint32_t { LLVM_ABI ArrayRef> getComparisonFuncs(); +#define COMPARISON_FUNC(Val, Enum) \ + case Val: \ + return true; +inline bool isValidComparisonFunc(uint32_t V) { + switch (V) { +#include "DXContainerConstants.def" + } + return false; +} + #define STATIC_BORDER_COLOR(Val, Enum) Enum = Val, enum class StaticBorderColor : uint32_t { #include "DXContainerConstants.def" }; +#define STATIC_BORDER_COLOR(Val, Enum) \ + case Val: \ + return true; +inline bool isValidBorderColor(uint32_t V) { + switch (V) { +#include "DXContainerConstants.def" + } + return false; +} + LLVM_ABI ArrayRef> getStaticBorderColors(); LLVM_ABI PartType parsePartType(StringRef S); diff --git a/llvm/include/llvm/Frontend/HLSL/RootSignatureValidations.h b/llvm/include/llvm/Frontend/HLSL/RootSignatureValidations.h index 24e851933949f..ea96094b18300 100644 --- a/llvm/include/llvm/Frontend/HLSL/RootSignatureValidations.h +++ b/llvm/include/llvm/Frontend/HLSL/RootSignatureValidations.h @@ -34,12 +34,8 @@ LLVM_ABI bool verifyDescriptorRangeFlag(uint32_t Version, dxil::ResourceClass Type, dxbc::DescriptorRangeFlags FlagsVal); LLVM_ABI bool verifyNumDescriptors(uint32_t NumDescriptors); -LLVM_ABI bool verifySamplerFilter(uint32_t Value); -LLVM_ABI bool verifyAddress(uint32_t Address); LLVM_ABI bool verifyMipLODBias(float MipLODBias); LLVM_ABI bool verifyMaxAnisotropy(uint32_t MaxAnisotropy); -LLVM_ABI bool verifyComparisonFunc(uint32_t ComparisonFunc); -LLVM_ABI bool verifyBorderColor(uint32_t BorderColor); LLVM_ABI bool verifyLOD(float LOD); LLVM_ABI bool verifyBoundOffset(uint32_t Offset); diff --git a/llvm/include/llvm/MC/DXContainerRootSignature.h b/llvm/include/llvm/MC/DXContainerRootSignature.h index f2722fd37a4f1..54677ef70244f 100644 --- a/llvm/include/llvm/MC/DXContainerRootSignature.h +++ b/llvm/include/llvm/MC/DXContainerRootSignature.h @@ -60,6 +60,22 @@ struct DescriptorTable { } }; +struct StaticSampler { + dxbc::SamplerFilter Filter; + dxbc::TextureAddressMode AddressU; + dxbc::TextureAddressMode AddressV; + dxbc::TextureAddressMode AddressW; + float MipLODBias; + uint32_t MaxAnisotropy; + dxbc::ComparisonFunc ComparisonFunc; + dxbc::StaticBorderColor BorderColor; + float MinLOD; + float MaxLOD; + uint32_t ShaderRegister; + uint32_t RegisterSpace; + dxbc::ShaderVisibility ShaderVisibility; +}; + struct RootParametersContainer { SmallVector ParametersInfo; @@ -125,7 +141,7 @@ struct RootSignatureDesc { uint32_t StaticSamplersOffset = 0u; uint32_t NumStaticSamplers = 0u; mcdxbc::RootParametersContainer ParametersContainer; - SmallVector StaticSamplers; + SmallVector StaticSamplers; LLVM_ABI void write(raw_ostream &OS) const; diff --git a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp index 31605e3900341..f29f2c7602fc6 100644 --- a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp +++ b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp @@ -52,13 +52,15 @@ static std::optional extractMdStringValue(MDNode *Node, return NodeText->getString(); } -static Expected -extractShaderVisibility(MDNode *Node, unsigned int OpId) { +template && + std::is_same_v, uint32_t>>> +Expected extractEnumValue(MDNode *Node, unsigned int OpId, StringRef ErrText, + llvm::function_ref VerifyFn) { if (std::optional Val = extractMdIntValue(Node, OpId)) { - if (!dxbc::isValidShaderVisibility(*Val)) - return make_error>( - "ShaderVisibility", *Val); - return dxbc::ShaderVisibility(*Val); + if (!VerifyFn(*Val)) + return make_error>(ErrText, *Val); + return static_cast(*Val); } return make_error("ShaderVisibility"); } @@ -233,7 +235,9 @@ Error MetadataParser::parseRootConstants(mcdxbc::RootSignatureDesc &RSD, return make_error("RootConstants Element"); Expected Visibility = - extractShaderVisibility(RootConstantNode, 1); + extractEnumValue(RootConstantNode, 1, + "ShaderVisibility", + dxbc::isValidShaderVisibility); if (auto E = Visibility.takeError()) return Error(std::move(E)); @@ -287,7 +291,9 @@ Error MetadataParser::parseRootDescriptors( } Expected Visibility = - extractShaderVisibility(RootDescriptorNode, 1); + extractEnumValue(RootDescriptorNode, 1, + "ShaderVisibility", + dxbc::isValidShaderVisibility); if (auto E = Visibility.takeError()) return Error(std::move(E)); @@ -380,7 +386,9 @@ Error MetadataParser::parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD, return make_error("Descriptor Table"); Expected Visibility = - extractShaderVisibility(DescriptorTableNode, 1); + extractEnumValue(DescriptorTableNode, 1, + "ShaderVisibility", + dxbc::isValidShaderVisibility); if (auto E = Visibility.takeError()) return Error(std::move(E)); @@ -406,26 +414,34 @@ Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, if (StaticSamplerNode->getNumOperands() != 14) return make_error("Static Sampler"); - dxbc::RTS0::v1::StaticSampler Sampler; - if (std::optional Val = extractMdIntValue(StaticSamplerNode, 1)) - Sampler.Filter = *Val; - else - return make_error("Filter"); + mcdxbc::StaticSampler Sampler; - if (std::optional Val = extractMdIntValue(StaticSamplerNode, 2)) - Sampler.AddressU = *Val; - else - return make_error("AddressU"); + Expected Filter = extractEnumValue( + StaticSamplerNode, 1, "Filter", dxbc::isValidSamplerFilter); + if (auto E = Filter.takeError()) + return Error(std::move(E)); + Sampler.Filter = *Filter; - if (std::optional Val = extractMdIntValue(StaticSamplerNode, 3)) - Sampler.AddressV = *Val; - else - return make_error("AddressV"); + Expected AddressU = + extractEnumValue( + StaticSamplerNode, 2, "AddressU", dxbc::isValidAddress); + if (auto E = AddressU.takeError()) + return Error(std::move(E)); + Sampler.AddressU = *AddressU; - if (std::optional Val = extractMdIntValue(StaticSamplerNode, 4)) - Sampler.AddressW = *Val; - else - return make_error("AddressW"); + Expected AddressV = + extractEnumValue( + StaticSamplerNode, 3, "AddressV", dxbc::isValidAddress); + if (auto E = AddressV.takeError()) + return Error(std::move(E)); + Sampler.AddressV = *AddressV; + + Expected AddressW = + extractEnumValue( + StaticSamplerNode, 4, "AddressW", dxbc::isValidAddress); + if (auto E = AddressW.takeError()) + return Error(std::move(E)); + Sampler.AddressW = *AddressW; if (std::optional Val = extractMdFloatValue(StaticSamplerNode, 5)) Sampler.MipLODBias = *Val; @@ -437,15 +453,19 @@ Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, else return make_error("MaxAnisotropy"); - if (std::optional Val = extractMdIntValue(StaticSamplerNode, 7)) - Sampler.ComparisonFunc = *Val; - else - return make_error("ComparisonFunc"); + Expected ComparisonFunc = + extractEnumValue( + StaticSamplerNode, 7, "ComparisonFunc", dxbc::isValidComparisonFunc); + if (auto E = ComparisonFunc.takeError()) + return Error(std::move(E)); + Sampler.ComparisonFunc = *ComparisonFunc; - if (std::optional Val = extractMdIntValue(StaticSamplerNode, 8)) - Sampler.BorderColor = *Val; - else - return make_error("ComparisonFunc"); + Expected BorderColor = + extractEnumValue( + StaticSamplerNode, 8, "BorderColor", dxbc::isValidBorderColor); + if (auto E = BorderColor.takeError()) + return Error(std::move(E)); + Sampler.BorderColor = *BorderColor; if (std::optional Val = extractMdFloatValue(StaticSamplerNode, 9)) Sampler.MinLOD = *Val; @@ -467,10 +487,13 @@ Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, else return make_error("RegisterSpace"); - if (std::optional Val = extractMdIntValue(StaticSamplerNode, 13)) - Sampler.ShaderVisibility = *Val; - else - return make_error("ShaderVisibility"); + Expected Visibility = + extractEnumValue(StaticSamplerNode, 13, + "ShaderVisibility", + dxbc::isValidShaderVisibility); + if (auto E = Visibility.takeError()) + return Error(std::move(E)); + Sampler.ShaderVisibility = *Visibility; RSD.StaticSamplers.push_back(Sampler); return Error::success(); @@ -594,30 +617,7 @@ Error MetadataParser::validateRootSignature( } } - for (const dxbc::RTS0::v1::StaticSampler &Sampler : RSD.StaticSamplers) { - if (!hlsl::rootsig::verifySamplerFilter(Sampler.Filter)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error>( - "Filter", Sampler.Filter)); - - if (!hlsl::rootsig::verifyAddress(Sampler.AddressU)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error>( - "AddressU", Sampler.AddressU)); - - if (!hlsl::rootsig::verifyAddress(Sampler.AddressV)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error>( - "AddressV", Sampler.AddressV)); - - if (!hlsl::rootsig::verifyAddress(Sampler.AddressW)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error>( - "AddressW", Sampler.AddressW)); + for (const mcdxbc::StaticSampler &Sampler : RSD.StaticSamplers) { if (!hlsl::rootsig::verifyMipLODBias(Sampler.MipLODBias)) DeferredErrs = joinErrors(std::move(DeferredErrs), @@ -630,18 +630,6 @@ Error MetadataParser::validateRootSignature( make_error>( "MaxAnisotropy", Sampler.MaxAnisotropy)); - if (!hlsl::rootsig::verifyComparisonFunc(Sampler.ComparisonFunc)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error>( - "ComparisonFunc", Sampler.ComparisonFunc)); - - if (!hlsl::rootsig::verifyBorderColor(Sampler.BorderColor)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error>( - "BorderColor", Sampler.BorderColor)); - if (!hlsl::rootsig::verifyLOD(Sampler.MinLOD)) DeferredErrs = joinErrors(std::move(DeferredErrs), make_error>( @@ -663,12 +651,6 @@ Error MetadataParser::validateRootSignature( joinErrors(std::move(DeferredErrs), make_error>( "RegisterSpace", Sampler.RegisterSpace)); - - if (!dxbc::isValidShaderVisibility(Sampler.ShaderVisibility)) - DeferredErrs = - joinErrors(std::move(DeferredErrs), - make_error>( - "ShaderVisibility", Sampler.ShaderVisibility)); } return DeferredErrs; diff --git a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp index d682dda0bab26..0970977b5064f 100644 --- a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp +++ b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp @@ -115,27 +115,6 @@ bool verifyNumDescriptors(uint32_t NumDescriptors) { return NumDescriptors > 0; } -bool verifySamplerFilter(uint32_t Value) { - switch (Value) { -#define FILTER(Num, Val) case llvm::to_underlying(dxbc::SamplerFilter::Val): -#include "llvm/BinaryFormat/DXContainerConstants.def" - return true; - } - return false; -} - -// Values allowed here: -// https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_texture_address_mode#syntax -bool verifyAddress(uint32_t Address) { - switch (Address) { -#define TEXTURE_ADDRESS_MODE(Num, Val) \ - case llvm::to_underlying(dxbc::TextureAddressMode::Val): -#include "llvm/BinaryFormat/DXContainerConstants.def" - return true; - } - return false; -} - bool verifyMipLODBias(float MipLODBias) { return MipLODBias >= -16.f && MipLODBias <= 15.99f; } @@ -144,26 +123,6 @@ bool verifyMaxAnisotropy(uint32_t MaxAnisotropy) { return MaxAnisotropy <= 16u; } -bool verifyComparisonFunc(uint32_t ComparisonFunc) { - switch (ComparisonFunc) { -#define COMPARISON_FUNC(Num, Val) \ - case llvm::to_underlying(dxbc::ComparisonFunc::Val): -#include "llvm/BinaryFormat/DXContainerConstants.def" - return true; - } - return false; -} - -bool verifyBorderColor(uint32_t BorderColor) { - switch (BorderColor) { -#define STATIC_BORDER_COLOR(Num, Val) \ - case llvm::to_underlying(dxbc::StaticBorderColor::Val): -#include "llvm/BinaryFormat/DXContainerConstants.def" - return true; - } - return false; -} - bool verifyLOD(float LOD) { return !std::isnan(LOD); } bool verifyBoundOffset(uint32_t Offset) { diff --git a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp index 1078b1188bb66..73dfa9899d613 100644 --- a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp +++ b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp @@ -335,20 +335,30 @@ Error DXContainerWriter::writeParts(raw_ostream &OS) { } for (const auto &Param : P.RootSignature->samplers()) { - dxbc::RTS0::v1::StaticSampler NewSampler; - NewSampler.Filter = Param.Filter; - NewSampler.AddressU = Param.AddressU; - NewSampler.AddressV = Param.AddressV; - NewSampler.AddressW = Param.AddressW; + assert(dxbc::isValidSamplerFilter(Param.Filter) && + dxbc::isValidAddress(Param.AddressU) && + dxbc::isValidAddress(Param.AddressV) && + dxbc::isValidAddress(Param.AddressW) && + dxbc::isValidComparisonFunc(Param.ComparisonFunc) && + dxbc::isValidBorderColor(Param.BorderColor) && + dxbc::isValidShaderVisibility(Param.ShaderVisibility) && + "Invalid enum value in static sampler"); + + mcdxbc::StaticSampler NewSampler; + NewSampler.Filter = dxbc::SamplerFilter(Param.Filter); + NewSampler.AddressU = dxbc::TextureAddressMode(Param.AddressU); + NewSampler.AddressV = dxbc::TextureAddressMode(Param.AddressV); + NewSampler.AddressW = dxbc::TextureAddressMode(Param.AddressW); NewSampler.MipLODBias = Param.MipLODBias; NewSampler.MaxAnisotropy = Param.MaxAnisotropy; - NewSampler.ComparisonFunc = Param.ComparisonFunc; - NewSampler.BorderColor = Param.BorderColor; + NewSampler.ComparisonFunc = dxbc::ComparisonFunc(Param.ComparisonFunc); + NewSampler.BorderColor = dxbc::StaticBorderColor(Param.BorderColor); NewSampler.MinLOD = Param.MinLOD; NewSampler.MaxLOD = Param.MaxLOD; NewSampler.ShaderRegister = Param.ShaderRegister; NewSampler.RegisterSpace = Param.RegisterSpace; - NewSampler.ShaderVisibility = Param.ShaderVisibility; + NewSampler.ShaderVisibility = + dxbc::ShaderVisibility(Param.ShaderVisibility); RS.StaticSamplers.push_back(NewSampler); } diff --git a/llvm/lib/Target/DirectX/DXILPostOptimizationValidation.cpp b/llvm/lib/Target/DirectX/DXILPostOptimizationValidation.cpp index 28d4dd64e8945..7e93474e73118 100644 --- a/llvm/lib/Target/DirectX/DXILPostOptimizationValidation.cpp +++ b/llvm/lib/Target/DirectX/DXILPostOptimizationValidation.cpp @@ -214,7 +214,7 @@ static void validateRootSignature(Module &M, } } - for (const dxbc::RTS0::v1::StaticSampler &S : RSD.StaticSamplers) + for (const mcdxbc::StaticSampler &S : RSD.StaticSamplers) Builder.trackBinding(dxil::ResourceClass::Sampler, S.RegisterSpace, S.ShaderRegister, S.ShaderRegister, &S); diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml index 82d9a4ffdb4f8..888a32b351690 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml @@ -20,7 +20,7 @@ Parts: StaticSamplersOffset: 24 Parameters: [] Samplers: - - Filter: 10 + - Filter: 16 AddressU: 1 AddressV: 2 AddressW: 5 @@ -46,7 +46,7 @@ Parts: #CHECK-NEXT: StaticSamplersOffset: 24 #CHECK-NEXT: Parameters: [] #CHECK-NEXT: Samplers: -#CHECK-NEXT: - Filter: 10 +#CHECK-NEXT: - Filter: 16 #CHECK-NEXT: AddressU: 1 #CHECK-NEXT: AddressV: 2 #CHECK-NEXT: AddressW: 5 diff --git a/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp b/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp index 4cf8f61e83c8d..a264ca7c3c3f6 100644 --- a/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp +++ b/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp @@ -492,7 +492,7 @@ TEST(RootSignature, ParseStaticSamplers) { StaticSamplersOffset: 24 Parameters: [] Samplers: - - Filter: 10 + - Filter: 16 AddressU: 1 AddressV: 2 AddressW: 5 @@ -517,7 +517,7 @@ TEST(RootSignature, ParseStaticSamplers) { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x52, 0x54, 0x53, 0x30, 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xa4, 0x70, 0x9d, 0x3f, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0xeb, 0x91, 0x40, 0x66, 0x66, 0x0e, 0x41, From b0181514b4d2a5f61ae5b405ee32643e6b8ff71b Mon Sep 17 00:00:00 2001 From: "Henrik G. Olsson" Date: Thu, 11 Sep 2025 15:49:30 -0700 Subject: [PATCH 047/734] [Utils] Compare true file locations instead of string paths (#158160) Previously we compared paths by string manipulation, however Windows paths can use both '\' and '/' as path separators, which made this fragile. This uses the pathlib.Path.samefile API instead. --- llvm/utils/lit/lit/DiffUpdater.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/utils/lit/lit/DiffUpdater.py b/llvm/utils/lit/lit/DiffUpdater.py index 5bba2d70991df..fefcdcc99f3f2 100644 --- a/llvm/utils/lit/lit/DiffUpdater.py +++ b/llvm/utils/lit/lit/DiffUpdater.py @@ -1,6 +1,7 @@ import shutil import os import shlex +import pathlib """ This file provides the `diff_test_updater` function, which is invoked on failed RUN lines when lit is executed with --update-tests. @@ -76,14 +77,12 @@ def get_target_dir(commands, test_path): @staticmethod def create(path, commands, test_path, target_dir): - filename = path.replace(target_dir, "") - if filename.startswith(os.sep): - filename = filename[len(os.sep) :] + path = pathlib.Path(path) with open(test_path, "r") as f: lines = f.readlines() for i, l in enumerate(lines): p = SplitFileTarget._get_split_line_path(l) - if p == filename: + if p and path.samefile(os.path.join(target_dir, p)): idx = i break else: From 50da22a9f4394de964eceb23097e1d05cc4dcdb6 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Thu, 11 Sep 2025 15:51:02 -0700 Subject: [PATCH 048/734] [alpha.webkit.UncountedCallArgsChecker] A return value can be erroneously treated as unsafe if it's a template parameter (#157993) When a template class takes Ref as a template parameter and this template parameter is used as the return value of a member function, the return value can be treated as unsafe (i.e. emits a false positive). The issue was caused by getCanonicalType sometimes converting Ref to T. Workaround this problem by avoid emitting a warning when the original, non-canonical type is a safe pointer type. --- .../Checkers/WebKit/ASTUtils.cpp | 18 ++++++++++++++++ .../WebKit/template-wrapper-call-arg.cpp | 21 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 clang/test/Analysis/Checkers/WebKit/template-wrapper-call-arg.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index 3fc10385885a3..6f13d552b4b44 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -161,6 +161,24 @@ bool tryToFindPtrOrigin( Name == "NSClassFromString") return callback(E, true); } + + // Sometimes, canonical type erroneously turns Ref into T. + // Workaround this problem by checking again if the original type was + // a SubstTemplateTypeParmType of a safe smart pointer type (e.g. Ref). + if (auto *CalleeDecl = call->getCalleeDecl()) { + if (auto *FD = dyn_cast(CalleeDecl)) { + auto RetType = FD->getReturnType(); + if (auto *Subst = dyn_cast(RetType)) { + if (auto *SubstType = Subst->desugar().getTypePtr()) { + if (auto *RD = dyn_cast(SubstType)) { + if (auto *CXX = dyn_cast(RD->getOriginalDecl())) + if (isSafePtr(CXX)) + return callback(E, true); + } + } + } + } + } } if (auto *ObjCMsgExpr = dyn_cast(E)) { if (auto *Method = ObjCMsgExpr->getMethodDecl()) { diff --git a/clang/test/Analysis/Checkers/WebKit/template-wrapper-call-arg.cpp b/clang/test/Analysis/Checkers/WebKit/template-wrapper-call-arg.cpp new file mode 100644 index 0000000000000..b0ff210f9415e --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/template-wrapper-call-arg.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s +// expected-no-diagnostics + +#include "mock-types.h" + +struct Obj { + void ref() const; + void deref() const; + + void someFunction(); +}; + +template class Wrapper { +public: + T obj(); +}; + +static void foo(Wrapper>&& wrapper) +{ + wrapper.obj()->someFunction(); +} From 580fdeb6ff55fcd54be16ed8555eaaa6a9aee1c0 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 11 Sep 2025 15:55:11 -0700 Subject: [PATCH 049/734] [lldb] Test global variable support of dwim-print (NFC) (#157908) DIL has made `frame variable` support global variables, which in turn means dwim-print inherits support for global variables. --- lldb/test/API/commands/dwim-print/TestDWIMPrint.py | 10 +++++++++- lldb/test/API/commands/dwim-print/main.cpp | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py index 492d49f008a9e..82ff59f74f41f 100644 --- a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py +++ b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py @@ -16,7 +16,7 @@ def _run_cmd(self, cmd: str) -> str: self.ci.HandleCommand(cmd, result) return result.GetOutput().rstrip() - VAR_IDENT = re.compile(r"(?:\$\d+|[\w.]+) = ") + VAR_IDENT = re.compile(r"(?:\$\d+|(?:::)?[\w.]+) = ") def _strip_result_var(self, string: str) -> str: """ @@ -185,3 +185,11 @@ def test_direct_child_access(self): self, "break inside", lldb.SBFileSpec("main.cpp") ) self._expect_cmd("dwim-print number", "frame variable") + + def test_global_variables(self): + """Test dwim-print supports global variables.""" + self.build() + lldbutil.run_to_source_breakpoint( + self, "break here", lldb.SBFileSpec("main.cpp") + ) + self._expect_cmd("dwim-print gGlobal", "frame variable") diff --git a/lldb/test/API/commands/dwim-print/main.cpp b/lldb/test/API/commands/dwim-print/main.cpp index d1abb5a85dd45..5b7cbd7da764b 100644 --- a/lldb/test/API/commands/dwim-print/main.cpp +++ b/lldb/test/API/commands/dwim-print/main.cpp @@ -1,5 +1,8 @@ extern "C" int puts(const char *s); +extern int gGlobal; +int gGlobal = 23; + struct Structure { int number = 30; void f() { puts("break inside"); } From aa32b6f294838d10bdb36c76678b68b7322c2e4e Mon Sep 17 00:00:00 2001 From: Vincent Lee Date: Thu, 11 Sep 2025 16:17:41 -0700 Subject: [PATCH 050/734] [tablegen][test] Make additional-encoding.td more reliable (#158123) Similar to `OPC_Decode`, `OPC_TryDecode` should also be relaxed in case new opcodes are added. `llvm/test/TableGen/trydecode-emission.td` is an example of a test that follows this pattern. Apply the same relaxation in `additional-encoding.td` as well. --- .../FixedLenDecoderEmitter/additional-encoding.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/additional-encoding.td b/llvm/test/TableGen/FixedLenDecoderEmitter/additional-encoding.td index ec7e35e1ecac7..47c9335f6cdf2 100644 --- a/llvm/test/TableGen/FixedLenDecoderEmitter/additional-encoding.td +++ b/llvm/test/TableGen/FixedLenDecoderEmitter/additional-encoding.td @@ -35,22 +35,22 @@ class I : Instruction { // CHECK-NEXT: /* 7 */ MCD::OPC_Scope, 8, 0, // Skip to: 18 // CHECK-NEXT: /* 10 */ MCD::OPC_CheckField, 6, 6, 0, // CHECK-NEXT: /* 14 */ MCD::OPC_Decode, {{[0-9]+}}, 2, 0, // Opcode: {{.*}}:NOP, DecodeIdx: 0 -// CHECK-NEXT: /* 18 */ MCD::OPC_TryDecode, 187, 2, 1, +// CHECK-NEXT: /* 18 */ MCD::OPC_TryDecode, {{[0-9]+}}, 2, 1, // CHECK-NEXT: /* 22 */ MCD::OPC_FilterValueOrSkip, 1, 15, 0, // Skip to: 41 // CHECK-NEXT: /* 26 */ MCD::OPC_Scope, 8, 0, // Skip to: 37 // CHECK-NEXT: /* 29 */ MCD::OPC_CheckField, 6, 6, 0, // CHECK-NEXT: /* 33 */ MCD::OPC_Decode, {{[0-9]+}}, 2, 0, // Opcode: {{.*}}:NOP, DecodeIdx: 0 -// CHECK-NEXT: /* 37 */ MCD::OPC_TryDecode, 188, 2, 1, +// CHECK-NEXT: /* 37 */ MCD::OPC_TryDecode, {{[0-9]+}}, 2, 1, // CHECK-NEXT: /* 41 */ MCD::OPC_FilterValueOrSkip, 2, 15, 0, // Skip to: 60 // CHECK-NEXT: /* 45 */ MCD::OPC_Scope, 8, 0, // Skip to: 56 // CHECK-NEXT: /* 48 */ MCD::OPC_CheckField, 6, 6, 0, // CHECK-NEXT: /* 52 */ MCD::OPC_Decode, {{[0-9]+}}, 2, 0, // Opcode: {{.*}}:NOP, DecodeIdx: 0 -// CHECK-NEXT: /* 56 */ MCD::OPC_TryDecode, 189, 2, 1, +// CHECK-NEXT: /* 56 */ MCD::OPC_TryDecode, {{[0-9]+}}, 2, 1, // CHECK-NEXT: /* 60 */ MCD::OPC_FilterValue, 3, // CHECK-NEXT: /* 62 */ MCD::OPC_Scope, 8, 0, // Skip to: 73 // CHECK-NEXT: /* 65 */ MCD::OPC_CheckField, 6, 6, 0, // CHECK-NEXT: /* 69 */ MCD::OPC_Decode, {{[0-9]+}}, 2, 0, // Opcode: {{.*}}:NOP, DecodeIdx: 0 -// CHECK-NEXT: /* 73 */ MCD::OPC_TryDecode, 190, 2, 1, +// CHECK-NEXT: /* 73 */ MCD::OPC_TryDecode, {{[0-9]+}}, 2, 1, class SHIFT opc> : I<(outs), (ins ShAmtOp:$shamt)>, EncSHIFT; From 5d8d98cfa8e98a61b1d7d91e7e941c7ff289fed1 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 18:53:56 -0400 Subject: [PATCH 051/734] [libc++] Report the results of the benchmarking job back through a comment --- .github/workflows/libcxx-run-benchmarks.yml | 53 +++++++++++++++++---- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml index 992c5ea0c2dc0..1055a567f9c5b 100644 --- a/.github/workflows/libcxx-run-benchmarks.yml +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -10,7 +10,7 @@ name: Benchmark libc++ permissions: - contents: read # Default everything to read-only + contents: read on: issue_comment: @@ -24,6 +24,9 @@ env: jobs: run-benchmarks: + permissions: + pull-requests: write + if: >- github.event.issue.pull_request && contains(github.event.comment.body, '/libcxx-bot benchmark') @@ -40,6 +43,7 @@ jobs: python3 -m venv .venv source .venv/bin/activate python -m pip install pygithub + cat <> ${GITHUB_OUTPUT} import github repo = github.Github("${{ github.token }}").get_repo("${{ github.repository }}") @@ -59,18 +63,47 @@ jobs: - name: Run baseline run: | - source .venv/bin/activate - python -m pip install -r repo/libcxx/utils/requirements.txt - baseline_commit=$(git -C repo merge-base ${{ steps.vars.outputs.pr_base }} ${{ steps.vars.outputs.pr_head }}) - ./repo/libcxx/utils/test-at-commit --git-repo repo --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }} + source .venv/bin/activate && cd repo + python -m pip install -r libcxx/utils/requirements.txt + baseline_commit=$(git merge-base ${{ steps.vars.outputs.pr_base }} ${{ steps.vars.outputs.pr_head }}) + ./libcxx/utils/test-at-commit --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }} - name: Run candidate run: | - source .venv/bin/activate - ./repo/libcxx/utils/test-at-commit --git-repo repo --commit ${{ steps.vars.outputs.pr_head }} -B build/candidate -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }} + source .venv/bin/activate && cd repo + ./libcxx/utils/test-at-commit --commit ${{ steps.vars.outputs.pr_head }} -B build/candidate -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }} - name: Compare baseline and candidate runs run: | - source .venv/bin/activate - ./repo/libcxx/utils/compare-benchmarks <(./repo/libcxx/utils/consolidate-benchmarks build/baseline) \ - <(./repo/libcxx/utils/consolidate-benchmarks build/candidate) + source .venv/bin/activate && cd repo + ./libcxx/utils/compare-benchmarks <(./libcxx/utils/consolidate-benchmarks build/baseline) \ + <(./libcxx/utils/consolidate-benchmarks build/candidate) > results.txt + + - name: Update comment with results + run: | + source .venv/bin/activate && cd repo + cat < + + Benchmark results: + + + \`\`\` + {benchmark_results} + \`\`\` + + + """ + + comment.edit(new_comment_text) + EOF From 13ae9ea4d66b53d836c465c4330e3ccdba0d01d0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 12 Sep 2025 00:36:06 +0100 Subject: [PATCH 052/734] [MLIR] Avoid resolving callable outside the analysis scope in DeadCodeAnalysis (#155088) We are using the symbol table machinery to lookup for a callable, but when the analysis scope if a function, such lookup will resolve outside of the scope. This can lead to race-condition issues since other passes may operate in parallel on the sibling functions. The callable would be discarded right after the lookup (we check the analysis scope), so avoiding the lookup is NFC. For the DataFlow solver, we're looking at the top-level operation, and if it isn't a SymbolTable we disable the interprocedural optimization in the solver config directly. This strategy isn't NFC but seems reasonnable and does not encounter any change in behavior in practice in tree. Fix #154948 --- .../mlir/Analysis/DataFlow/DeadCodeAnalysis.h | 7 ++++ .../Analysis/DataFlow/DeadCodeAnalysis.cpp | 37 ++++++++++++++----- mlir/lib/Analysis/DataFlow/DenseAnalysis.cpp | 22 +++++++---- mlir/lib/Analysis/DataFlow/SparseAnalysis.cpp | 10 +++-- mlir/lib/Analysis/DataFlowFramework.cpp | 7 ++++ 5 files changed, 62 insertions(+), 21 deletions(-) diff --git a/mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h index 2250db823b551..c7c405e1423cb 100644 --- a/mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h +++ b/mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h @@ -229,6 +229,13 @@ class DeadCodeAnalysis : public DataFlowAnalysis { /// considered an external callable. Operation *analysisScope; + /// Whether the analysis scope has a symbol table. This is used to avoid + /// resolving callables outside the analysis scope. + /// It is updated when recursing into a region in case where the top-level + /// operation does not have a symbol table, but one is encountered in a nested + /// region. + bool hasSymbolTable = false; + /// A symbol table used for O(1) symbol lookups during simplification. SymbolTableCollection symbolTable; }; diff --git a/mlir/lib/Analysis/DataFlow/DeadCodeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/DeadCodeAnalysis.cpp index 9424eff3e6b6f..131c49c44171b 100644 --- a/mlir/lib/Analysis/DataFlow/DeadCodeAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlow/DeadCodeAnalysis.cpp @@ -22,6 +22,7 @@ #include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLog.h" @@ -159,6 +160,7 @@ void DeadCodeAnalysis::initializeSymbolCallables(Operation *top) { LDBG() << "[init] Entering initializeSymbolCallables for top-level op: " << OpWithFlags(top, OpPrintingFlags().skipRegions()); analysisScope = top; + hasSymbolTable = top->hasTrait(); auto walkFn = [&](Operation *symTable, bool allUsesVisible) { LDBG() << "[init] Processing symbol table op: " << OpWithFlags(symTable, OpPrintingFlags().skipRegions()); @@ -260,14 +262,25 @@ LogicalResult DeadCodeAnalysis::initializeRecursively(Operation *op) { return failure(); } // Recurse on nested operations. - for (Region ®ion : op->getRegions()) { - LDBG() << "[init] Recursing into region of op: " - << OpWithFlags(op, OpPrintingFlags().skipRegions()); - for (Operation &nestedOp : region.getOps()) { - LDBG() << "[init] Recursing into nested op: " - << OpWithFlags(&nestedOp, OpPrintingFlags().skipRegions()); - if (failed(initializeRecursively(&nestedOp))) - return failure(); + if (op->getNumRegions()) { + // If we haven't seen a symbol table yet, check if the current operation + // has one. If so, update the flag to allow for resolving callables in + // nested regions. + bool savedHasSymbolTable = hasSymbolTable; + auto restoreHasSymbolTable = + llvm::make_scope_exit([&]() { hasSymbolTable = savedHasSymbolTable; }); + if (!hasSymbolTable && op->hasTrait()) + hasSymbolTable = true; + + for (Region ®ion : op->getRegions()) { + LDBG() << "[init] Recursing into region of op: " + << OpWithFlags(op, OpPrintingFlags().skipRegions()); + for (Operation &nestedOp : region.getOps()) { + LDBG() << "[init] Recursing into nested op: " + << OpWithFlags(&nestedOp, OpPrintingFlags().skipRegions()); + if (failed(initializeRecursively(&nestedOp))) + return failure(); + } } } LDBG() << "[init] Finished initializeRecursively for op: " @@ -388,7 +401,13 @@ LogicalResult DeadCodeAnalysis::visit(ProgramPoint *point) { void DeadCodeAnalysis::visitCallOperation(CallOpInterface call) { LDBG() << "visitCallOperation: " << OpWithFlags(call.getOperation(), OpPrintingFlags().skipRegions()); - Operation *callableOp = call.resolveCallableInTable(&symbolTable); + + Operation *callableOp = nullptr; + if (hasSymbolTable) + callableOp = call.resolveCallableInTable(&symbolTable); + else + LDBG() + << "No symbol table present in analysis scope, can't resolve callable"; // A call to a externally-defined callable has unknown predecessors. const auto isExternalCallable = [this](Operation *op) { diff --git a/mlir/lib/Analysis/DataFlow/DenseAnalysis.cpp b/mlir/lib/Analysis/DataFlow/DenseAnalysis.cpp index d05374f667a51..b51465bc31ec3 100644 --- a/mlir/lib/Analysis/DataFlow/DenseAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlow/DenseAnalysis.cpp @@ -64,10 +64,12 @@ void AbstractDenseForwardDataFlowAnalysis::visitCallOperation( AbstractDenseLattice *after) { // Allow for customizing the behavior of calls to external symbols, including // when the analysis is explicitly marked as non-interprocedural. - auto callable = - dyn_cast_if_present(call.resolveCallable()); - if (!getSolverConfig().isInterprocedural() || - (callable && !callable.getCallableRegion())) { + auto isExternalCallable = [&]() { + auto callable = + dyn_cast_if_present(call.resolveCallable()); + return callable && !callable.getCallableRegion(); + }; + if (!getSolverConfig().isInterprocedural() || isExternalCallable()) { return visitCallControlFlowTransfer( call, CallControlFlowAction::ExternalCallee, before, after); } @@ -290,6 +292,12 @@ AbstractDenseBackwardDataFlowAnalysis::visit(ProgramPoint *point) { void AbstractDenseBackwardDataFlowAnalysis::visitCallOperation( CallOpInterface call, const AbstractDenseLattice &after, AbstractDenseLattice *before) { + // If the solver is not interprocedural, let the hook handle it as an external + // callee. + if (!getSolverConfig().isInterprocedural()) + return visitCallControlFlowTransfer( + call, CallControlFlowAction::ExternalCallee, after, before); + // Find the callee. Operation *callee = call.resolveCallableInTable(&symbolTable); @@ -297,12 +305,10 @@ void AbstractDenseBackwardDataFlowAnalysis::visitCallOperation( // No region means the callee is only declared in this module. // If that is the case or if the solver is not interprocedural, // let the hook handle it. - if (!getSolverConfig().isInterprocedural() || - (callable && (!callable.getCallableRegion() || - callable.getCallableRegion()->empty()))) { + if (callable && + (!callable.getCallableRegion() || callable.getCallableRegion()->empty())) return visitCallControlFlowTransfer( call, CallControlFlowAction::ExternalCallee, after, before); - } if (!callable) return setToExitState(before); diff --git a/mlir/lib/Analysis/DataFlow/SparseAnalysis.cpp b/mlir/lib/Analysis/DataFlow/SparseAnalysis.cpp index 13a3e1480c836..0d2e2ed85549d 100644 --- a/mlir/lib/Analysis/DataFlow/SparseAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlow/SparseAnalysis.cpp @@ -228,10 +228,12 @@ LogicalResult AbstractSparseForwardDataFlowAnalysis::visitCallOperation( ArrayRef resultLattices) { // If the call operation is to an external function, attempt to infer the // results from the call arguments. - auto callable = - dyn_cast_if_present(call.resolveCallable()); - if (!getSolverConfig().isInterprocedural() || - (callable && !callable.getCallableRegion())) { + auto isExternalCallable = [&]() { + auto callable = + dyn_cast_if_present(call.resolveCallable()); + return callable && !callable.getCallableRegion(); + }; + if (!getSolverConfig().isInterprocedural() || isExternalCallable()) { visitExternalCallImpl(call, operandLattices, resultLattices); return success(); } diff --git a/mlir/lib/Analysis/DataFlowFramework.cpp b/mlir/lib/Analysis/DataFlowFramework.cpp index 7e1b4052027d3..9352ab02f7472 100644 --- a/mlir/lib/Analysis/DataFlowFramework.cpp +++ b/mlir/lib/Analysis/DataFlowFramework.cpp @@ -9,6 +9,7 @@ #include "mlir/Analysis/DataFlowFramework.h" #include "mlir/IR/Location.h" #include "mlir/IR/Operation.h" +#include "mlir/IR/SymbolTable.h" #include "mlir/IR/Value.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/iterator.h" @@ -109,6 +110,12 @@ LogicalResult DataFlowSolver::initializeAndRun(Operation *top) { isRunning = true; auto guard = llvm::make_scope_exit([&]() { isRunning = false; }); + bool isInterprocedural = config.isInterprocedural(); + auto restoreInterprocedural = llvm::make_scope_exit( + [&]() { config.setInterprocedural(isInterprocedural); }); + if (isInterprocedural && !top->hasTrait()) + config.setInterprocedural(false); + // Initialize equivalent lattice anchors. for (DataFlowAnalysis &analysis : llvm::make_pointee_range(childAnalyses)) { analysis.initializeEquivalentLatticeAnchor(top); From 1083289c78d5851293f32ccebd4931d18dcf943e Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 19:46:03 -0400 Subject: [PATCH 053/734] [libc++] Tee the output of the benchmark comparison --- .github/workflows/libcxx-run-benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml index 1055a567f9c5b..be02b809fc6b7 100644 --- a/.github/workflows/libcxx-run-benchmarks.yml +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -77,7 +77,7 @@ jobs: run: | source .venv/bin/activate && cd repo ./libcxx/utils/compare-benchmarks <(./libcxx/utils/consolidate-benchmarks build/baseline) \ - <(./libcxx/utils/consolidate-benchmarks build/candidate) > results.txt + <(./libcxx/utils/consolidate-benchmarks build/candidate) | tee results.txt - name: Update comment with results run: | From 1c325a07f804e5c0f36fd18b48cd1e35d835fb32 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 08:56:34 +0900 Subject: [PATCH 054/734] AMDGPU: Stop checking allocatable in adjustAllocatableRegClass (#158105) This no longer does anything. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 398c99b3bd127..6762079dd632d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5975,12 +5975,10 @@ SIInstrInfo::getWholeWaveFunctionSetup(MachineFunction &MF) const { static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, - const MCInstrDesc &TID, unsigned RCID, - bool IsAllocatable) { - if ((IsAllocatable || !ST.hasGFX90AInsts()) && - (((TID.mayLoad() || TID.mayStore()) && - !(TID.TSFlags & SIInstrFlags::Spill)) || - (TID.TSFlags & SIInstrFlags::MIMG))) { + const MCInstrDesc &TID, unsigned RCID) { + if (!ST.hasGFX90AInsts() && (((TID.mayLoad() || TID.mayStore()) && + !(TID.TSFlags & SIInstrFlags::Spill)) || + (TID.TSFlags & SIInstrFlags::MIMG))) { switch (RCID) { case AMDGPU::AV_32RegClassID: RCID = AMDGPU::VGPR_32RegClassID; @@ -6020,7 +6018,7 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, return RI.getRegClass(RegClass); } - return adjustAllocatableRegClass(ST, RI, TID, RegClass, false); + return adjustAllocatableRegClass(ST, RI, TID, RegClass); } const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, @@ -6039,7 +6037,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, } unsigned RCID = Desc.operands()[OpNo].RegClass; - return adjustAllocatableRegClass(ST, RI, Desc, RCID, true); + return adjustAllocatableRegClass(ST, RI, Desc, RCID); } void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { From 28743fafa6b5358ede23da93f3ca7d52d1b4f75c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 08:57:22 +0900 Subject: [PATCH 055/734] AMDGPU: Use RegisterOperand for MIMG vaddr classes (#158087) This reduces the diff in a future patch. --- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 114 +++++++++++---------- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 20 +++- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 2 +- 3 files changed, 78 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index bf787b230067d..291c03ab223a8 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -272,13 +272,13 @@ class MIMGNSAHelper +class PartialNSAHelper : NSAHelper<> { - list addr_types = + list addr_types = !if(!ge(num_addrs, max_addr), - !listconcat(!listsplat(VGPR_32, !sub(max_addr, 1)), [LastAddrRC]), - !listsplat(VGPR_32, num_addrs)); + !listconcat(!listsplat(VGPROp_32, !sub(max_addr, 1)), [LastAddrRC]), + !listsplat(VGPROp_32, num_addrs)); int VAddrCount = !if(!gt(num_addrs, max_addr), max_addr, num_addrs); list AddrAsmNames = !foreach(i, !range(VAddrCount), "vaddr" # i); @@ -359,7 +359,7 @@ class MIMG_gfx11 // Note that 1-dword addresses always use non-NSA variants. class MIMG_nsa_gfx11 addr_types=[], - RegisterClass LastAddrRC = VGPR_32> + RegisterOperand LastAddrRC = VGPROp_32> : MIMG, MIMGe_gfx11 { let SubtargetPredicate = isGFX11Only; let AssemblerPredicate = isGFX11Only; @@ -400,7 +400,7 @@ class VIMAGE_gfx12 + RegisterOperand Addr3RC> : VSAMPLE, VSAMPLEe { let SubtargetPredicate = isGFX12Plus; let AssemblerPredicate = isGFX12Plus; @@ -421,7 +421,7 @@ class VSAMPLE_gfx12 : MIMG_gfx6789 { let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc, @@ -434,10 +434,10 @@ class MIMG_NoSampler_Helper : MIMG_gfx90a .ret:$vdata), dns> { - let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc, + let InOperandList = !con((ins getAlign2RegOp.ret:$vaddr, SReg_256_XNULL:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); @@ -446,7 +446,7 @@ class MIMG_NoSampler_Helper_gfx90a : MIMG_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask, @@ -471,7 +471,7 @@ class MIMG_NoSampler_nsa_gfx10 : MIMG_gfx11 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask, @@ -508,7 +508,7 @@ class VIMAGE_NoSampler_gfx12 : VSAMPLE_gfx12 { let InOperandList = !con(AddrIns, @@ -525,7 +525,7 @@ class VSAMPLE_Sampler_gfx12 : VSAMPLE_gfx12 { let InOperandList = !con(AddrIns, @@ -550,16 +550,16 @@ multiclass MIMG_NoSampler_Src_Helper ; if !not(ExtendedImageInst) then - def _V1_gfx90a : MIMG_NoSampler_Helper_gfx90a ; - def _V1_gfx10 : MIMG_NoSampler_gfx10; } if op.HAS_GFX11 then { - def _V1_gfx11 : MIMG_NoSampler_gfx11; } } @@ -576,14 +576,14 @@ multiclass MIMG_NoSampler_Src_Helper ; + def _V2 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then - def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a ; - def _V2_gfx10 : MIMG_NoSampler_gfx10; + def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a ; + def _V2_gfx10 : MIMG_NoSampler_gfx10; def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } if op.HAS_GFX11 then { - def _V2_gfx11 : MIMG_NoSampler_gfx11; + def _V2_gfx11 : MIMG_NoSampler_gfx11; def _V2_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; } } @@ -600,14 +600,14 @@ multiclass MIMG_NoSampler_Src_Helper ; + def _V3 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then - def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a ; - def _V3_gfx10 : MIMG_NoSampler_gfx10; + def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a ; + def _V3_gfx10 : MIMG_NoSampler_gfx10; def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } if op.HAS_GFX11 then { - def _V3_gfx11 : MIMG_NoSampler_gfx11; + def _V3_gfx11 : MIMG_NoSampler_gfx11; def _V3_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; } } @@ -624,15 +624,15 @@ multiclass MIMG_NoSampler_Src_Helper ; + def _V4 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then - def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a ; - def _V4_gfx10 : MIMG_NoSampler_gfx10; + def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a ; + def _V4_gfx10 : MIMG_NoSampler_gfx10; def _V4_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } if op.HAS_GFX11 then { - def _V4_gfx11 : MIMG_NoSampler_gfx11; + def _V4_gfx11 : MIMG_NoSampler_gfx11; def _V4_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; } @@ -640,7 +640,7 @@ multiclass MIMG_NoSampler_Src_Helper ; } else { @@ -1128,7 +1128,7 @@ multiclass MIMG_Atomic_Renamed ; class MIMG_Sampler_Helper + RegisterOperand src_rc, string dns=""> : MIMG_gfx6789 { let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp, DMask:$dmask, UNorm:$unorm, CPol:$cpol, @@ -1139,7 +1139,7 @@ class MIMG_Sampler_Helper + RegisterOperand src_rc, string dns=""> : MIMG_gfx90a { let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp, DMask:$dmask, UNorm:$unorm, CPol:$cpol, @@ -1164,7 +1164,7 @@ class MIMG_Sampler_Asm_gfx10p { } class MIMG_Sampler_gfx10 : MIMG_gfx10 { let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; @@ -1180,7 +1180,7 @@ class MIMG_Sampler_nsa_gfx10 : MIMG_gfx10 { let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; @@ -1200,7 +1200,7 @@ class MIMG_Sampler_nortn_nsa_gfx10 : MIMG_gfx11 { let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; @@ -1209,7 +1209,7 @@ class MIMG_Sampler_gfx11 + RegisterOperand LastVAddrSize, string dns=""> : MIMG_nsa_gfx11 { let InOperandList = MIMG_Sampler_OpList_gfx10p.ret; @@ -1217,7 +1217,7 @@ class MIMG_Sampler_nsa_gfx11 : MIMG_gfx11 { let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; @@ -1227,7 +1227,7 @@ class MIMG_Sampler_nortn_gfx11 + RegisterOperand LastVAddrSize, string dns=""> : MIMG_nsa_gfx11 { let InOperandList = MIMG_Sampler_OpList_gfx10p.ret; let AsmString = MIMG_Sampler_Asm_gfx10p.ret; @@ -1237,20 +1237,20 @@ class MIMG_Sampler_nortn_nsa_gfx11 { int NumWords = dw; - RegisterClass RegClass = !if(!le(AddrDW, 0), ?, - !if(!eq(AddrDW, 1), VGPR_32, - !if(!eq(AddrDW, 2), VReg_64, - !if(!eq(AddrDW, 3), VReg_96, - !if(!eq(AddrDW, 4), VReg_128, - !if(!eq(AddrDW, 5), VReg_160, - !if(!eq(AddrDW, 6), VReg_192, - !if(!eq(AddrDW, 7), VReg_224, - !if(!eq(AddrDW, 8), VReg_256, - !if(!eq(AddrDW, 9), VReg_288, - !if(!eq(AddrDW, 10), VReg_320, - !if(!eq(AddrDW, 11), VReg_352, - !if(!eq(AddrDW, 12), VReg_384, - !if(!le(AddrDW, 16), VReg_512, ?)))))))))))))); + RegisterOperand RegClass = !if(!le(AddrDW, 0), ?, + !if(!eq(AddrDW, 1), VGPROp_32, + !if(!eq(AddrDW, 2), VGPROp_64, + !if(!eq(AddrDW, 3), VGPROp_96, + !if(!eq(AddrDW, 4), VGPROp_128, + !if(!eq(AddrDW, 5), VGPROp_160, + !if(!eq(AddrDW, 6), VGPROp_192, + !if(!eq(AddrDW, 7), VGPROp_224, + !if(!eq(AddrDW, 8), VGPROp_256, + !if(!eq(AddrDW, 9), VGPROp_288, + !if(!eq(AddrDW, 10), VGPROp_320, + !if(!eq(AddrDW, 11), VGPROp_352, + !if(!eq(AddrDW, 12), VGPROp_384, + !if(!le(AddrDW, 16), VGPROp_512, ?)))))))))))))); // Whether the instruction variant with this vaddr size should be enabled for // the auto-generated disassembler. @@ -1514,8 +1514,10 @@ multiclass MIMG_Gather_WQM class MIMG_IntersectRay_Helper { int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11))); - RegisterClass RegClass = MIMGAddrSize.RegClass; - int VAddrDwords = !srl(RegClass.Size, 5); + RegisterOperand RegClass = MIMGAddrSize.RegClass; + + defvar Size = !cast(RegClass.RegClass).Size; + int VAddrDwords = !srl(Size, 5); int GFX11PlusNSAAddrs = !if(IsA16, 4, 5); RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); @@ -1526,7 +1528,7 @@ class MIMG_IntersectRay_Helper { true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); } -class MIMG_IntersectRay_gfx10 +class MIMG_IntersectRay_gfx10 : MIMG_gfx10 { let InOperandList = (ins AddrRC:$vaddr0, SReg_128_XNULL:$srsrc, A16:$a16); let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16"; @@ -1540,7 +1542,7 @@ class MIMG_IntersectRay_nsa_gfx10 let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16"; } -class MIMG_IntersectRay_gfx11 +class MIMG_IntersectRay_gfx11 : MIMG_gfx11 { let InOperandList = (ins AddrRC:$vaddr0, SReg_128_XNULL:$srsrc, A16:$a16); let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16"; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index aa5dae09ca185..c8231b470abae 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2596,24 +2596,42 @@ class getAlign2RegOp { RegisterOperand ret = !cond(!eq(RC, VGPROp_16) : VGPROp_16, !eq(RC, VGPROp_32) : VGPROp_32, + !eq(RC, VGPROp_64) : VGPROp_64_Align2, !eq(RC, VGPROp_64_Align1) : VGPROp_64_Align2, + !eq(RC, VGPROp_64_Align2) : VGPROp_64_Align2, + !eq(RC, VGPROp_96) : VGPROp_96_Align2, !eq(RC, VGPROp_96_Align1) : VGPROp_96_Align2, + !eq(RC, VGPROp_96_Align2) : VGPROp_96_Align2, + !eq(RC, VGPROp_128) : VGPROp_128_Align2, !eq(RC, VGPROp_128_Align1) : VGPROp_128_Align2, + !eq(RC, VGPROp_128_Align2) : VGPROp_128_Align2, + !eq(RC, VGPROp_160) : VGPROp_160_Align2, !eq(RC, VGPROp_160_Align1) : VGPROp_160_Align2, + !eq(RC, VGPROp_160_Align2) : VGPROp_160_Align2, + !eq(RC, VGPROp_1024) : VGPROp_1024_Align2, !eq(RC, VGPROp_1024_Align1) : VGPROp_1024_Align2, + !eq(RC, VGPROp_1024_Align2) : VGPROp_1024_Align2, + !eq(RC, AVLdSt_32) : AVLdSt_32, + !eq(RC, AVLdSt_64_Align1) : AVLdSt_64_Align2, !eq(RC, AVLdSt_64) : AVLdSt_64_Align2, + !eq(RC, AVLdSt_96) : AVLdSt_96_Align2, + !eq(RC, AVLdSt_96_Align1) : AVLdSt_96_Align1, !eq(RC, AVLdSt_96_Align1) : AVLdSt_96_Align2, + !eq(RC, AVLdSt_128) : AVLdSt_128_Align2, !eq(RC, AVLdSt_128_Align1) : AVLdSt_128_Align2, + !eq(RC, AVLdSt_128_Align2) : AVLdSt_128_Align2, + !eq(RC, AVLdSt_160) : AVLdSt_160_Align2, - !eq(RC, AVLdSt_160_Align1) : AVLdSt_160_Align2); + !eq(RC, AVLdSt_160_Align1) : AVLdSt_160_Align2, + !eq(RC, AVLdSt_160_Align2) : AVLdSt_160_Align2); } class getEquivalentAGPROperand { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 5f5eec49bab06..31dd6b9e8d84d 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1327,7 +1327,7 @@ def VGPROp_16 : VGPROp { } def VGPROp_32 : VGPROp; -foreach size = ["64", "96", "128", "160", "192", "224", "256", "288", "512", "1024"] in { +foreach size = ["64", "96", "128", "160", "192", "224", "256", "288", "320", "352", "384", "512", "1024"] in { def VGPROp_#size : VGPROp("VReg_"#size)>; } From 5a21128f24a7f9a48166ae4a0aafe5bd70154012 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 08:57:47 +0900 Subject: [PATCH 056/734] AMDGPU: Relax legal register operand constraint (#157989) Find a common subclass instead of directly checking for a subclass relationship. This fixes folding logic for unaligned register defs into aligned use contexts. e.g., a vreg_64 def into an av_64_align2 use should be able to find the common subclass vreg_align2. This avoids regressions in future patches. Checking the subclass was also redundant on the subregister path; getMatchingSuperRegClass is sufficient. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 8 +-- .../AMDGPU/GlobalISel/vni8-across-blocks.ll | 22 +++---- .../test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll | 63 ++++++++++--------- llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir | 10 ++- 4 files changed, 51 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 6762079dd632d..23a124fecddad 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6125,12 +6125,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); if (!SuperRC) return false; - - DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()); - if (!DRC) - return false; + return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()) != nullptr; } - return RC->hasSuperClassEq(DRC); + + return RI.getCommonSubClass(DRC, RC) != nullptr; } bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll index 9c2fabce4bcde..b33b8a7d8cd72 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll @@ -7,33 +7,33 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) ; GFX906-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX906-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX906-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX906-NEXT: v_mov_b32_e32 v4, 8 +; GFX906-NEXT: v_mov_b32_e32 v3, 8 ; GFX906-NEXT: v_mov_b32_e32 v5, 16 ; GFX906-NEXT: s_waitcnt lgkmcnt(0) -; GFX906-NEXT: global_load_dword v3, v2, s[0:1] +; GFX906-NEXT: global_load_dword v4, v2, s[0:1] ; GFX906-NEXT: v_mov_b32_e32 v1, 0xff ; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0 ; GFX906-NEXT: s_waitcnt vmcnt(0) -; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v3 -; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX906-NEXT: v_or3_b32 v3, v6, v7, v3 +; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v4 +; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX906-NEXT: v_lshlrev_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX906-NEXT: v_or3_b32 v4, v6, v7, v4 ; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX906-NEXT: s_cbranch_execz .LBB0_2 ; GFX906-NEXT: ; %bb.1: ; %bb.1 ; GFX906-NEXT: global_load_dword v0, v2, s[2:3] ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX906-NEXT: v_or3_b32 v3, v2, v3, v0 +; GFX906-NEXT: v_or3_b32 v4, v2, v3, v0 ; GFX906-NEXT: .LBB0_2: ; %bb.2 ; GFX906-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v4 ; GFX906-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX906-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX906-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX906-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX906-NEXT: v_and_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 diff --git a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll index 7b33374453010..6b6eb43baf856 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll @@ -969,37 +969,38 @@ define void @flat_atomic_xchg_i64_ret_av_av(ptr %ptr) #0 { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: s_mov_b64 s[0:1], 0x50 -; GFX950-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX950-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[0:1] ; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3 ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; def v[0:1] +; GFX950-NEXT: ; def v[4:5] ; GFX950-NEXT: ;;#ASMEND -; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB14_2 ; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global ; GFX950-NEXT: buffer_wbl2 sc0 sc1 -; GFX950-NEXT: flat_atomic_swap_x2 v[2:3], v[4:5], v[0:1] sc0 sc1 +; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: buffer_inv sc0 sc1 +; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 ; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5 ; GFX950-NEXT: .LBB14_2: ; %Flow ; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB14_4 ; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private -; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX950-NEXT: scratch_load_dwordx2 v[2:3], v4, off +; GFX950-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc +; GFX950-NEXT: scratch_load_dwordx2 v[0:1], v2, off ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX950-NEXT: scratch_store_dwordx2 v2, v[4:5], off ; GFX950-NEXT: .LBB14_4: ; %atomicrmw.phi ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX950-NEXT: s_waitcnt vmcnt(1) ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; use v[2:3] +; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: s_setpc_b64 s[30:31] @@ -1058,37 +1059,38 @@ define void @flat_atomic_xchg_i64_ret_av_v(ptr %ptr) #0 { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: s_mov_b64 s[0:1], 0x50 -; GFX950-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX950-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[0:1] ; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3 ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; def v[0:1] +; GFX950-NEXT: ; def v[4:5] ; GFX950-NEXT: ;;#ASMEND -; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB15_2 ; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global ; GFX950-NEXT: buffer_wbl2 sc0 sc1 -; GFX950-NEXT: flat_atomic_swap_x2 v[2:3], v[4:5], v[0:1] sc0 sc1 +; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: buffer_inv sc0 sc1 +; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 ; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5 ; GFX950-NEXT: .LBB15_2: ; %Flow ; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB15_4 ; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private -; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX950-NEXT: scratch_load_dwordx2 v[2:3], v4, off +; GFX950-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc +; GFX950-NEXT: scratch_load_dwordx2 v[0:1], v2, off ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX950-NEXT: scratch_store_dwordx2 v2, v[4:5], off ; GFX950-NEXT: .LBB15_4: ; %atomicrmw.phi ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX950-NEXT: s_waitcnt vmcnt(1) ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; use v[2:3] +; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: s_setpc_b64 s[30:31] @@ -1149,11 +1151,11 @@ define void @flat_atomic_xchg_i64_ret_av_a(ptr %ptr) #0 { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: s_mov_b64 s[0:1], 0x50 -; GFX950-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[0:1] +; GFX950-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] ; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3 +; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; def v[0:1] +; GFX950-NEXT: ; def v[2:3] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: ; implicit-def: $agpr0_agpr1 ; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc @@ -1161,22 +1163,23 @@ define void @flat_atomic_xchg_i64_ret_av_a(ptr %ptr) #0 { ; GFX950-NEXT: s_cbranch_execz .LBB16_2 ; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global ; GFX950-NEXT: buffer_wbl2 sc0 sc1 -; GFX950-NEXT: flat_atomic_swap_x2 v[2:3], v[2:3], v[0:1] sc0 sc1 +; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: buffer_inv sc0 sc1 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v2 -; GFX950-NEXT: v_accvgpr_write_b32 a1, v3 ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 +; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 +; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX950-NEXT: .LBB16_2: ; %Flow ; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB16_4 ; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private -; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc -; GFX950-NEXT: scratch_load_dwordx2 a[0:1], v2, off +; GFX950-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX950-NEXT: scratch_load_dwordx2 a[0:1], v0, off ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: scratch_store_dwordx2 v2, v[0:1], off +; GFX950-NEXT: scratch_store_dwordx2 v0, v[2:3], off ; GFX950-NEXT: .LBB16_4: ; %atomicrmw.phi ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX950-NEXT: s_waitcnt vmcnt(1) diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir index 103c3e3eb8bc6..e1295d4a09563 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir @@ -17,9 +17,8 @@ body: | ... # GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg -# GCN: %0:sreg_64 = IMPLICIT_DEF -# GCN-NEXT: %2:sgpr_32 = COPY %0.sub0 -# GCN-NEXT: S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0 +# GCN: %0:sreg_64_xexec = IMPLICIT_DEF +# GCN-NEXT: S_STORE_DWORD_IMM %0.sub0, undef $sgpr10_sgpr11, 0, 0 name: fold_sgpr_to_sgpr_copy_subreg body: | @@ -32,9 +31,8 @@ body: | ... # GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg2 -# GCN: %0:sreg_64 = IMPLICIT_DEF -# GCN-NEXT: %3:sreg_32_xm0_xexec = COPY %0.sub0 -# GCN-NEXT: S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0 +# GCN: %0:sreg_64_xexec = IMPLICIT_DEF +# GCN-NEXT: S_STORE_DWORD_IMM %0.sub0, undef $sgpr10_sgpr11, 0, 0 name: fold_sgpr_to_sgpr_copy_subreg2 body: | From 98d14ad50e42f36a7e02b126ca3fb8b15d39a73b Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Sep 2025 20:20:09 -0400 Subject: [PATCH 057/734] [libc++] Improve output of the comment-triggered benchmarking job --- .github/workflows/libcxx-run-benchmarks.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml index be02b809fc6b7..5714600b63a5e 100644 --- a/.github/workflows/libcxx-run-benchmarks.yml +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -67,17 +67,18 @@ jobs: python -m pip install -r libcxx/utils/requirements.txt baseline_commit=$(git merge-base ${{ steps.vars.outputs.pr_base }} ${{ steps.vars.outputs.pr_head }}) ./libcxx/utils/test-at-commit --commit ${baseline_commit} -B build/baseline -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }} + ./libcxx/utils/consolidate-benchmarks build/baseline | tee baseline.lnt - name: Run candidate run: | source .venv/bin/activate && cd repo ./libcxx/utils/test-at-commit --commit ${{ steps.vars.outputs.pr_head }} -B build/candidate -- -sv -j1 --param optimization=speed ${{ steps.vars.outputs.benchmarks }} + ./libcxx/utils/consolidate-benchmarks build/candidate | tee candidate.lnt - name: Compare baseline and candidate runs run: | source .venv/bin/activate && cd repo - ./libcxx/utils/compare-benchmarks <(./libcxx/utils/consolidate-benchmarks build/baseline) \ - <(./libcxx/utils/consolidate-benchmarks build/candidate) | tee results.txt + ./libcxx/utils/compare-benchmarks baseline.lnt candidate.lnt | tee results.txt - name: Update comment with results run: | From 6040c007e3dd3339299786eae78246036d68d954 Mon Sep 17 00:00:00 2001 From: jtstogel Date: Thu, 11 Sep 2025 17:30:11 -0700 Subject: [PATCH 058/734] [bazel] Fix diff-test-update bazel test target by depending on split-file (#158170) #157765 added tests that depend on the split-file utility, which breaks the Bazel test target. --- .../bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel index d89626a6ee9e6..7146cdc53a29f 100644 --- a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel @@ -31,6 +31,7 @@ expand_template( "//llvm:FileCheck", "//llvm:count", "//llvm:not", + "//llvm:split-file", ] + glob(["Inputs/**"]), ) for src in glob( From 69e3ff67ac69f3a1177910cffefce19034563b7c Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Fri, 12 Sep 2025 01:42:08 +0100 Subject: [PATCH 059/734] [libc++] Fix ranges_rotate.pass.cpp complexity checks (#158144) The complexity is "at most N swaps" _for each invocation of `rotate`_, but the tests currently assert that the total number of swaps for N calls is at most N. The standard allows that to be N squared, so the test is either requiring more than the standard (and the comment in the test) promises, or somebody just forgot to reset the counter on each iteration. --- .../alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp index 5f594400e8321..574e96dea46a0 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp @@ -173,6 +173,7 @@ constexpr bool test() { auto end = adl::Iterator::TrackSwaps(in.data() + in.size(), swaps); for (std::size_t mid = 0; mid != input.size(); ++mid) { + swaps = 0; std::ranges::rotate(begin, begin + mid, end); assert(swaps <= expected); } @@ -186,6 +187,7 @@ constexpr bool test() { auto range = std::ranges::subrange(begin, end); for (std::size_t mid = 0; mid != input.size(); ++mid) { + swaps = 0; std::ranges::rotate(range, begin + mid); assert(swaps <= expected); } From 2740e4b73682eb7a6869c333991a608304938952 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 11 Sep 2025 17:51:47 -0700 Subject: [PATCH 060/734] [clang] Remove shell requirements from tests Most of these tests do not actually have a shell requirement. The shell requirement ended up in the test either from cargo culting (from what I can tell) or because the test authors actually meant to mark Windows as unsupported. This prevents enablement of lit's internal shell within clang. Towards #102699. Reviewers: rnk, efriedma-quic, Sirraide, petrhosek, ilovepi Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/156905 --- clang/test/Analysis/scan-build/cxx-name.test | 2 -- clang/test/Analysis/scan-build/deduplication.test | 2 -- clang/test/Analysis/scan-build/html_output.test | 2 -- clang/test/Analysis/scan-build/plist_html_output.test | 2 -- clang/test/Analysis/scan-build/plist_output.test | 2 -- clang/test/ClangScanDeps/module-format.c | 2 -- clang/test/ClangScanDeps/modules-context-hash-cwd.c | 4 ++-- clang/test/ClangScanDeps/modules-file-path-isolation.c | 3 ++- clang/test/ClangScanDeps/modules-in-stable-dirs.c | 4 +++- clang/test/ClangScanDeps/modules-symlink-dir-from-module.c | 2 +- clang/test/ClangScanDeps/modules-symlink-dir-vfs.c | 2 +- clang/test/ClangScanDeps/modules-symlink-dir.c | 4 ++-- clang/test/ClangScanDeps/prebuilt-modules-in-stable-dirs.c | 6 ++++-- clang/test/ClangScanDeps/subframework_header_dir_symlink.m | 2 +- clang/test/ClangScanDeps/symlink.cpp | 3 ++- clang/test/CodeCompletion/included-symlinks.cpp | 2 +- clang/test/Driver/aarch64-toolchain-extra.c | 3 +-- clang/test/Driver/amdgpu-hip-system-arch.c | 3 ++- clang/test/Driver/amdgpu-openmp-system-arch-fail.c | 3 ++- clang/test/Driver/arm-toolchain-extra.c | 1 - clang/test/Driver/baremetal-multilib-layered.yaml | 1 - clang/test/Driver/baremetal-multilib.yaml | 1 - clang/test/Driver/baremetal-sysroot.cpp | 1 - clang/test/Driver/darwin-ld-demangle-lld.c | 1 - clang/test/Driver/darwin-ld-lto-lld.c | 2 -- clang/test/Driver/mingw-sysroot.cpp | 1 - clang/test/Driver/no-canonical-prefixes.c | 2 +- clang/test/Driver/nvptx-cuda-system-arch.c | 2 +- clang/test/Driver/openmp-system-arch.c | 3 ++- clang/test/Driver/parse-progname.c | 3 ++- clang/test/Driver/riscv32-toolchain-extra.c | 3 +-- clang/test/Driver/riscv64-toolchain-extra.c | 1 - clang/test/Driver/sigpipe-handling.c | 2 +- clang/test/Driver/target-override.c | 3 +-- clang/test/Driver/verbose-output-quoting.c | 2 +- clang/test/Frontend/dependency-gen-symlink.c | 2 +- clang/test/Index/preamble-reparse-changed-module.m | 2 +- clang/test/InterfaceStubs/driver-test.c | 1 - clang/test/InterfaceStubs/driver-test2.c | 1 - clang/test/InterfaceStubs/driver-test3.c | 1 - clang/test/Modules/crash-vfs-headermaps.m | 2 +- clang/test/Modules/crash-vfs-include-pch.m | 2 +- clang/test/Modules/crash-vfs-path-emptydir-entries.m | 3 ++- clang/test/Modules/crash-vfs-path-symlink-topheader.m | 2 +- clang/test/Modules/embed-files-compressed.cpp | 1 - clang/test/Modules/embed-files.cpp | 2 +- clang/test/Modules/exponential-paths.cpp | 2 -- clang/test/Modules/framework-name.m | 3 ++- clang/test/Modules/implicit-private-without-public.m | 1 - clang/test/Modules/inferred-framework-case.m | 2 +- clang/test/Modules/module-file-modified.c | 1 - clang/test/Modules/module-symlink.m | 2 +- clang/test/Modules/modulemap-collision.m | 3 ++- clang/test/Modules/validate-file-content.m | 2 -- clang/test/PCH/validate-file-content.m | 2 -- clang/test/Preprocessor/embed_zos.c | 2 +- clang/test/Preprocessor/nonportable-include-with-hmap.c | 3 ++- clang/test/Profile/cxx-hash-v2.cpp | 2 -- .../warn-unsafe-buffer-usage-debug-unclaimed.cpp | 1 - .../test/Tooling/auto-detect-from-source-parent-of-cwd.cpp | 4 ++-- clang/test/Tooling/clang-check-pwd.cpp | 2 ++ llvm/utils/lit/lit/llvm/config.py | 3 +++ 62 files changed, 58 insertions(+), 78 deletions(-) diff --git a/clang/test/Analysis/scan-build/cxx-name.test b/clang/test/Analysis/scan-build/cxx-name.test index 483762d619d17..b602cb5c5231c 100644 --- a/clang/test/Analysis/scan-build/cxx-name.test +++ b/clang/test/Analysis/scan-build/cxx-name.test @@ -1,5 +1,3 @@ -REQUIRES: shell - RUN: %scan-build sh -c 'echo "CLANG_CXX=/$(basename "$CLANG_CXX")/"' | FileCheck %s Check that scan-build sets the CLANG_CXX environment variable (meant to be diff --git a/clang/test/Analysis/scan-build/deduplication.test b/clang/test/Analysis/scan-build/deduplication.test index 2ec3061701fce..067a5153d67db 100644 --- a/clang/test/Analysis/scan-build/deduplication.test +++ b/clang/test/Analysis/scan-build/deduplication.test @@ -1,5 +1,3 @@ -REQUIRES: shell - RUN: rm -rf %t.output_dir && mkdir %t.output_dir RUN: %scan-build -o %t.output_dir \ RUN: %clang -S %S/Inputs/deduplication/1.c \ diff --git a/clang/test/Analysis/scan-build/html_output.test b/clang/test/Analysis/scan-build/html_output.test index c2b509d9ef661..1eb4e73611cf2 100644 --- a/clang/test/Analysis/scan-build/html_output.test +++ b/clang/test/Analysis/scan-build/html_output.test @@ -1,5 +1,3 @@ -REQUIRES: shell - RUN: rm -rf %t.output_dir && mkdir %t.output_dir RUN: %scan-build -o %t.output_dir %clang -S %S/Inputs/single_null_dereference.c \ RUN: | FileCheck %s -check-prefix CHECK-STDOUT diff --git a/clang/test/Analysis/scan-build/plist_html_output.test b/clang/test/Analysis/scan-build/plist_html_output.test index ca9c5256b9d75..b995aa6d5d36a 100644 --- a/clang/test/Analysis/scan-build/plist_html_output.test +++ b/clang/test/Analysis/scan-build/plist_html_output.test @@ -1,5 +1,3 @@ -REQUIRES: shell - RUN: rm -rf %t.output_dir && mkdir %t.output_dir RUN: %scan-build -plist-html -o %t.output_dir %clang -S %S/Inputs/single_null_dereference.c \ RUN: | FileCheck %s -check-prefix CHECK-STDOUT diff --git a/clang/test/Analysis/scan-build/plist_output.test b/clang/test/Analysis/scan-build/plist_output.test index 4d01640bff6ea..1e7bef1035b51 100644 --- a/clang/test/Analysis/scan-build/plist_output.test +++ b/clang/test/Analysis/scan-build/plist_output.test @@ -1,5 +1,3 @@ -REQUIRES: shell - RUN: rm -rf %t.output_dir && mkdir %t.output_dir RUN: %scan-build -plist -o %t.output_dir %clang -S %S/Inputs/single_null_dereference.c \ RUN: | FileCheck %s -check-prefix CHECK-STDOUT diff --git a/clang/test/ClangScanDeps/module-format.c b/clang/test/ClangScanDeps/module-format.c index 0a6abec80dd90..acfe195c4e080 100644 --- a/clang/test/ClangScanDeps/module-format.c +++ b/clang/test/ClangScanDeps/module-format.c @@ -6,8 +6,6 @@ // section in XCOFF yet. // UNSUPPORTED: target={{.*}}-aix{{.*}} -// REQUIRES: shell - // RUN: rm -rf %t && mkdir %t // RUN: cp %S/Inputs/modules-pch/* %t diff --git a/clang/test/ClangScanDeps/modules-context-hash-cwd.c b/clang/test/ClangScanDeps/modules-context-hash-cwd.c index c609a7dcbc80e..b5086ed409223 100644 --- a/clang/test/ClangScanDeps/modules-context-hash-cwd.c +++ b/clang/test/ClangScanDeps/modules-context-hash-cwd.c @@ -1,7 +1,7 @@ +// Most likely platform specific sed differences +// UNSUPPORTED: system-windows // Test current directory pruning when computing the context hash. -// REQUIRES: shell - // RUN: rm -rf %t // RUN: split-file %s %t // RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json diff --git a/clang/test/ClangScanDeps/modules-file-path-isolation.c b/clang/test/ClangScanDeps/modules-file-path-isolation.c index 2bd0a58ca9ae6..55784cf41700e 100644 --- a/clang/test/ClangScanDeps/modules-file-path-isolation.c +++ b/clang/test/ClangScanDeps/modules-file-path-isolation.c @@ -3,7 +3,8 @@ // Note: the spelling of the modulemap path still depends on the includer, since // that is the only source of information about it. -// REQUIRES: shell +// Needs symlinks +// UNSUPPORTED: system-windows // RUN: rm -rf %t // RUN: split-file %s %t diff --git a/clang/test/ClangScanDeps/modules-in-stable-dirs.c b/clang/test/ClangScanDeps/modules-in-stable-dirs.c index 066c5445f41f4..f54e09fecee94 100644 --- a/clang/test/ClangScanDeps/modules-in-stable-dirs.c +++ b/clang/test/ClangScanDeps/modules-in-stable-dirs.c @@ -1,3 +1,6 @@ +// Most likely platform specific sed differences +// UNSUPPORTED: system-windows + // This test verifies modules that are entirely comprised from stable directory inputs are captured in // dependency information. @@ -5,7 +8,6 @@ // The second compilation verifies that external paths are resolved when a // vfsoverlay for determining is-in-stable-directories. -// REQUIRES: shell // RUN: rm -rf %t // RUN: split-file %s %t // RUN: sed -e "s|DIR|%/t|g" %t/compile-commands.json.in > %t/compile-commands.json diff --git a/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c b/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c index 5f0ebc13eb2ee..85f5f1acc3793 100644 --- a/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c +++ b/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c @@ -3,7 +3,7 @@ // module below does not transitively import Mod via a symlink, so it should not // see the symlinked path. -// REQUIRES: shell +// REQUIRES: symlinks // RUN: rm -rf %t // RUN: split-file %s %t diff --git a/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c b/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c index f2e5758aa41fb..eb49ab90c4d18 100644 --- a/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c +++ b/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c @@ -5,7 +5,7 @@ // RUN: rm -rf %t // RUN: split-file %s %t -// REQUIRES: shell +// REQUIRES: symlinks // RUN: mkdir -p %t/frameworks-symlink // RUN: ln -s %t/frameworks/FW.framework %t/frameworks-symlink/FW.framework diff --git a/clang/test/ClangScanDeps/modules-symlink-dir.c b/clang/test/ClangScanDeps/modules-symlink-dir.c index 35e830e8c6c57..9946b57c6680f 100644 --- a/clang/test/ClangScanDeps/modules-symlink-dir.c +++ b/clang/test/ClangScanDeps/modules-symlink-dir.c @@ -1,8 +1,8 @@ +// REQUIRES: symlinks + // Check that we canonicalize the module map path without changing the module // directory, which would break header lookup. -// REQUIRES: shell - // RUN: rm -rf %t // RUN: split-file %s %t // RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json diff --git a/clang/test/ClangScanDeps/prebuilt-modules-in-stable-dirs.c b/clang/test/ClangScanDeps/prebuilt-modules-in-stable-dirs.c index acacda1466b59..39b2863d966c3 100644 --- a/clang/test/ClangScanDeps/prebuilt-modules-in-stable-dirs.c +++ b/clang/test/ClangScanDeps/prebuilt-modules-in-stable-dirs.c @@ -1,3 +1,6 @@ +/// Most likely platform specific sed differences +// UNSUPPORTED: system-windows + /// This test validates that modules that depend on prebuilt modules /// resolve `is-in-stable-directories` correctly. /// The steps are: @@ -5,8 +8,7 @@ /// that is seemingly from the sysroot. However, it depends on a local header that is overlaid. /// 2. Build the PCH & dependency PCMs. /// 3. Scan a source file that transitively depends on the same modules as the pcm. - -// REQUIRES: shell + // RUN: rm -rf %t // RUN: split-file %s %t // RUN: sed -e "s|DIR|%/t|g" %t/overlay.json.template > %t/overlay.json diff --git a/clang/test/ClangScanDeps/subframework_header_dir_symlink.m b/clang/test/ClangScanDeps/subframework_header_dir_symlink.m index 3bbc5320d4e0c..66ff9df80527b 100644 --- a/clang/test/ClangScanDeps/subframework_header_dir_symlink.m +++ b/clang/test/ClangScanDeps/subframework_header_dir_symlink.m @@ -1,4 +1,4 @@ -// REQUIRES: shell +// REQUIRES: symlinks // RUN: rm -rf %t.dir // RUN: rm -rf %t.cdb // RUN: mkdir -p %t.dir diff --git a/clang/test/ClangScanDeps/symlink.cpp b/clang/test/ClangScanDeps/symlink.cpp index d262f8c7f1d95..5b13d88544d98 100644 --- a/clang/test/ClangScanDeps/symlink.cpp +++ b/clang/test/ClangScanDeps/symlink.cpp @@ -1,4 +1,5 @@ -// REQUIRES: shell +// REQUIRES: symlinks + // RUN: rm -rf %t.dir // RUN: rm -rf %t.cdb // RUN: mkdir -p %t.dir diff --git a/clang/test/CodeCompletion/included-symlinks.cpp b/clang/test/CodeCompletion/included-symlinks.cpp index 7ac5e20e7fdef..c3e7c0ab0c5a3 100644 --- a/clang/test/CodeCompletion/included-symlinks.cpp +++ b/clang/test/CodeCompletion/included-symlinks.cpp @@ -1,4 +1,4 @@ -// REQUIRES: shell +// REQUIRES: symlinks // RUN: rm -rf %t && mkdir -p %t/real/myproj && mkdir -p %t/links // RUN: touch %t/real/foo.h && ln -s %t/real/foo.h %t/links/foo.h // RUN: touch %t/real/foobar.h && ln -s %t/real/foobar.h %t/links/foobar.h diff --git a/clang/test/Driver/aarch64-toolchain-extra.c b/clang/test/Driver/aarch64-toolchain-extra.c index 4945a622969c6..ccd2876ea84cb 100644 --- a/clang/test/Driver/aarch64-toolchain-extra.c +++ b/clang/test/Driver/aarch64-toolchain-extra.c @@ -2,8 +2,7 @@ // The tests here are similar to those in aarch64-toolchain.c, however // these tests need to create symlinks to test directory trees in order to -// set up the environment and therefore shell support is required. -// REQUIRES: shell +// set up the environment and therefore POSIX is required. // UNSUPPORTED: system-windows // If there is no GCC install detected then the driver searches for executables diff --git a/clang/test/Driver/amdgpu-hip-system-arch.c b/clang/test/Driver/amdgpu-hip-system-arch.c index 12e298a8636b1..972105143debf 100644 --- a/clang/test/Driver/amdgpu-hip-system-arch.c +++ b/clang/test/Driver/amdgpu-hip-system-arch.c @@ -1,4 +1,5 @@ -// REQUIRES: shell +// Needs chmod +// UNSUPPORTED: system-windows // XFAIL: target={{.*}}-zos{{.*}} // RUN: mkdir -p %t diff --git a/clang/test/Driver/amdgpu-openmp-system-arch-fail.c b/clang/test/Driver/amdgpu-openmp-system-arch-fail.c index eb037183b4c3c..8973d66afbae4 100644 --- a/clang/test/Driver/amdgpu-openmp-system-arch-fail.c +++ b/clang/test/Driver/amdgpu-openmp-system-arch-fail.c @@ -1,4 +1,5 @@ -// REQUIRES: shell +// Due to chmod +// UNSUPPORTED: system-windows // RUN: mkdir -p %t // RUN: rm -f %t/amdgpu_arch_fail %t/amdgpu_arch_different diff --git a/clang/test/Driver/arm-toolchain-extra.c b/clang/test/Driver/arm-toolchain-extra.c index 43cca6112176c..03b30d6540898 100644 --- a/clang/test/Driver/arm-toolchain-extra.c +++ b/clang/test/Driver/arm-toolchain-extra.c @@ -3,7 +3,6 @@ // The tests here are similar to those in arm-toolchain.c, however // these tests need to create symlinks to test directory trees in order to // set up the environment and therefore shell support is required. -// REQUIRES: shell // UNSUPPORTED: system-windows // If there is no GCC install detected then the driver searches for executables diff --git a/clang/test/Driver/baremetal-multilib-layered.yaml b/clang/test/Driver/baremetal-multilib-layered.yaml index 6671d9d672f58..61d69140fb7fd 100644 --- a/clang/test/Driver/baremetal-multilib-layered.yaml +++ b/clang/test/Driver/baremetal-multilib-layered.yaml @@ -1,4 +1,3 @@ -# REQUIRES: shell # UNSUPPORTED: system-windows # This test demonstrates "layered" multilib in which more than one diff --git a/clang/test/Driver/baremetal-multilib.yaml b/clang/test/Driver/baremetal-multilib.yaml index 1a80c3b4ccfc8..c2b37fad97dea 100644 --- a/clang/test/Driver/baremetal-multilib.yaml +++ b/clang/test/Driver/baremetal-multilib.yaml @@ -1,4 +1,3 @@ -# REQUIRES: shell # UNSUPPORTED: system-windows # RUN: %clang --multi-lib-config=%s -no-canonical-prefixes -x c++ %s -### -o %t.out 2>&1 \ diff --git a/clang/test/Driver/baremetal-sysroot.cpp b/clang/test/Driver/baremetal-sysroot.cpp index 4c062e28e6bc3..717466c185763 100644 --- a/clang/test/Driver/baremetal-sysroot.cpp +++ b/clang/test/Driver/baremetal-sysroot.cpp @@ -1,4 +1,3 @@ -// REQUIRES: shell // UNSUPPORTED: system-windows // Test that when a --sysroot is not provided, driver picks the default diff --git a/clang/test/Driver/darwin-ld-demangle-lld.c b/clang/test/Driver/darwin-ld-demangle-lld.c index 12fd8502ce7a0..4edeb1058b933 100644 --- a/clang/test/Driver/darwin-ld-demangle-lld.c +++ b/clang/test/Driver/darwin-ld-demangle-lld.c @@ -1,5 +1,4 @@ // With -fuse-ld=lld, -demangle is always passed to the linker on Darwin. -// REQUIRES: shell // RUN: %clang --target=x86_64-apple-darwin -### -fuse-ld=lld \ // RUN: -B%S/Inputs/lld -mlinker-version=0 %s 2>&1 \ diff --git a/clang/test/Driver/darwin-ld-lto-lld.c b/clang/test/Driver/darwin-ld-lto-lld.c index 2f44cad534b1f..3e110463fc286 100644 --- a/clang/test/Driver/darwin-ld-lto-lld.c +++ b/clang/test/Driver/darwin-ld-lto-lld.c @@ -1,5 +1,3 @@ -// REQUIRES: shell - // Check that lld gets "-lto_library". // (Separate test file since darwin-ld-lto requires system-darwin but this // test doesn't require that.) diff --git a/clang/test/Driver/mingw-sysroot.cpp b/clang/test/Driver/mingw-sysroot.cpp index 0ba2f336fd2e0..8e46d23c1782d 100644 --- a/clang/test/Driver/mingw-sysroot.cpp +++ b/clang/test/Driver/mingw-sysroot.cpp @@ -1,4 +1,3 @@ -// REQUIRES: shell // UNSUPPORTED: system-windows // RUN: rm -rf %t.dir/testroot-gcc diff --git a/clang/test/Driver/no-canonical-prefixes.c b/clang/test/Driver/no-canonical-prefixes.c index 669e56639284a..f2b76db8de7e1 100644 --- a/clang/test/Driver/no-canonical-prefixes.c +++ b/clang/test/Driver/no-canonical-prefixes.c @@ -1,5 +1,5 @@ // Due to ln -sf: -// REQUIRES: shell +// UNSUPPORTED: system-windows // RUN: mkdir -p %t.real // RUN: cd %t.real // RUN: ln -sf %clang test-clang diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c index 2d4eca8c43bc3..675d15bf22cc0 100644 --- a/clang/test/Driver/nvptx-cuda-system-arch.c +++ b/clang/test/Driver/nvptx-cuda-system-arch.c @@ -1,4 +1,4 @@ -// REQUIRES: shell +// UNSUPPORTED: system-windows // XFAIL: target={{.*}}-zos{{.*}} // RUN: mkdir -p %t diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c index 167b07a23f512..1670fd30f4b59 100644 --- a/clang/test/Driver/openmp-system-arch.c +++ b/clang/test/Driver/openmp-system-arch.c @@ -1,4 +1,5 @@ -// REQUIRES: shell +// Needs chmod +// UNSUPPORTED: system-windows // XFAIL: target={{.*}}-zos{{.*}} // RUN: mkdir -p %t diff --git a/clang/test/Driver/parse-progname.c b/clang/test/Driver/parse-progname.c index 34040b81dc733..104ea971684aa 100644 --- a/clang/test/Driver/parse-progname.c +++ b/clang/test/Driver/parse-progname.c @@ -1,4 +1,5 @@ -// REQUIRES: shell, arm-registered-target +// REQUIRES: arm-registered-target +// UNSUPPORTED: system-windows // UNSUPPORTED: llvm-driver // RUN: mkdir -p %t diff --git a/clang/test/Driver/riscv32-toolchain-extra.c b/clang/test/Driver/riscv32-toolchain-extra.c index 420f7b5203609..d228a58d23d4e 100644 --- a/clang/test/Driver/riscv32-toolchain-extra.c +++ b/clang/test/Driver/riscv32-toolchain-extra.c @@ -2,8 +2,7 @@ // The tests here are similar to those in riscv32-toolchain.c, however // these tests need to create symlinks to test directory trees in order to -// set up the environment and therefore shell support is required. -// REQUIRES: shell +// set up the environment and therefore POSIX support is required. // UNSUPPORTED: system-windows // If there is no GCC install detected then the driver searches for executables diff --git a/clang/test/Driver/riscv64-toolchain-extra.c b/clang/test/Driver/riscv64-toolchain-extra.c index 87bcdeb651a1e..e1d3a10dd788d 100644 --- a/clang/test/Driver/riscv64-toolchain-extra.c +++ b/clang/test/Driver/riscv64-toolchain-extra.c @@ -3,7 +3,6 @@ // The tests here are similar to those in riscv64-toolchain.c, however // these tests need to create symlinks to test directory trees in order to // set up the environment and therefore shell support is required. -// REQUIRES: shell // UNSUPPORTED: system-windows // If there is no GCC install detected then the driver searches for executables diff --git a/clang/test/Driver/sigpipe-handling.c b/clang/test/Driver/sigpipe-handling.c index 852f0bfaf7981..7283800ba3c2f 100644 --- a/clang/test/Driver/sigpipe-handling.c +++ b/clang/test/Driver/sigpipe-handling.c @@ -1,4 +1,4 @@ -// REQUIRES: shell +// UNSUPPORTED: system-windows // RUN: %clang -E -fno-integrated-cc1 %s | head | FileCheck %s // Test that the parent clang driver process doesn't crash when the child cc1 diff --git a/clang/test/Driver/target-override.c b/clang/test/Driver/target-override.c index 2c605ac9a03da..5bd88e10e8576 100644 --- a/clang/test/Driver/target-override.c +++ b/clang/test/Driver/target-override.c @@ -1,5 +1,4 @@ -// REQUIRES: shell -// REQUIRES: x86-registered-target +// REQUIRES: x86-registered-target, symlinks // RUN: rm -rf %t && mkdir %t // RUN: ln -s %clang %t/i386-clang diff --git a/clang/test/Driver/verbose-output-quoting.c b/clang/test/Driver/verbose-output-quoting.c index b2781b754ecd7..ebfe3d08fb234 100644 --- a/clang/test/Driver/verbose-output-quoting.c +++ b/clang/test/Driver/verbose-output-quoting.c @@ -1,4 +1,4 @@ -// REQUIRES: shell +// UNSUPPORTED: system-windows // RUN: %clang --verbose -DSPACE="a b" -### %s 2>&1 | FileCheck -check-prefix=SPACE -strict-whitespace %s // RUN: %clang --verbose -DQUOTES=\"\" -### %s 2>&1 | FileCheck -check-prefix=QUOTES -strict-whitespace %s // RUN: %clang --verbose -DBACKSLASH=\\ -### %s 2>&1 | FileCheck -check-prefix=BACKSLASH -strict-whitespace %s diff --git a/clang/test/Frontend/dependency-gen-symlink.c b/clang/test/Frontend/dependency-gen-symlink.c index 2fa339ad2abf2..b88fb7f1a6b2f 100644 --- a/clang/test/Frontend/dependency-gen-symlink.c +++ b/clang/test/Frontend/dependency-gen-symlink.c @@ -1,4 +1,4 @@ -// REQUIRES: shell +// REQUIRES: symlinks // Basic test // RUN: rm -rf %t.dir diff --git a/clang/test/Index/preamble-reparse-changed-module.m b/clang/test/Index/preamble-reparse-changed-module.m index 349ed0db27d01..88e837afe6b0a 100644 --- a/clang/test/Index/preamble-reparse-changed-module.m +++ b/clang/test/Index/preamble-reparse-changed-module.m @@ -1,4 +1,4 @@ -// REQUIRES: shell +// UNSUPPORTED: system-windows // RUN: rm -rf %t // RUN: mkdir -p %t/mod diff --git a/clang/test/InterfaceStubs/driver-test.c b/clang/test/InterfaceStubs/driver-test.c index 741cdab3e9d31..9080890f85cde 100644 --- a/clang/test/InterfaceStubs/driver-test.c +++ b/clang/test/InterfaceStubs/driver-test.c @@ -1,5 +1,4 @@ // REQUIRES: x86-registered-target -// REQUIRES: shell // NOTE: -fno-integrated-cc1 has been added to work around an ASAN failure // caused by in-process cc1 invocation. Clang InterfaceStubs is not the diff --git a/clang/test/InterfaceStubs/driver-test2.c b/clang/test/InterfaceStubs/driver-test2.c index 905b27922264c..6c0eb00a62f37 100644 --- a/clang/test/InterfaceStubs/driver-test2.c +++ b/clang/test/InterfaceStubs/driver-test2.c @@ -1,5 +1,4 @@ // REQUIRES: x86-registered-target -// REQUIRES: shell // NOTE: -fno-integrated-cc1 has been added to work around an ASAN failure // caused by in-process cc1 invocation. Clang InterfaceStubs is not the diff --git a/clang/test/InterfaceStubs/driver-test3.c b/clang/test/InterfaceStubs/driver-test3.c index 407fb5c20cb41..0973f1ffc18f4 100644 --- a/clang/test/InterfaceStubs/driver-test3.c +++ b/clang/test/InterfaceStubs/driver-test3.c @@ -1,5 +1,4 @@ // REQUIRES: x86-registered-target -// REQUIRES: shell // RUN: mkdir -p %t; cd %t // RUN: %clang -target x86_64-unknown-linux-gnu -c -emit-interface-stubs %s -o %t/driver-test3.o diff --git a/clang/test/Modules/crash-vfs-headermaps.m b/clang/test/Modules/crash-vfs-headermaps.m index 0afa0dee63792..26ff3f26450b5 100644 --- a/clang/test/Modules/crash-vfs-headermaps.m +++ b/clang/test/Modules/crash-vfs-headermaps.m @@ -1,4 +1,4 @@ -// REQUIRES: crash-recovery, shell, system-darwin +// REQUIRES: crash-recovery, system-darwin // RUN: rm -rf %t // RUN: mkdir -p %t/m %t/i/Foo.framework/Headers diff --git a/clang/test/Modules/crash-vfs-include-pch.m b/clang/test/Modules/crash-vfs-include-pch.m index 9ca10020094b0..2610b06974578 100644 --- a/clang/test/Modules/crash-vfs-include-pch.m +++ b/clang/test/Modules/crash-vfs-include-pch.m @@ -1,4 +1,4 @@ -// REQUIRES: crash-recovery, shell, system-darwin +// REQUIRES: crash-recovery, system-darwin // // RUN: rm -rf %t // RUN: mkdir -p %t/m %t/out diff --git a/clang/test/Modules/crash-vfs-path-emptydir-entries.m b/clang/test/Modules/crash-vfs-path-emptydir-entries.m index a7ee1fe176fb0..9564f11cdbcff 100644 --- a/clang/test/Modules/crash-vfs-path-emptydir-entries.m +++ b/clang/test/Modules/crash-vfs-path-emptydir-entries.m @@ -1,4 +1,5 @@ -// REQUIRES: crash-recovery, shell +// UNSUPPORTED: system-windows +// REQUIRES: crash-recovery // FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? // XFAIL: target={{.*-windows-gnu}} diff --git a/clang/test/Modules/crash-vfs-path-symlink-topheader.m b/clang/test/Modules/crash-vfs-path-symlink-topheader.m index 5c2d502b209da..bab754fcb749b 100644 --- a/clang/test/Modules/crash-vfs-path-symlink-topheader.m +++ b/clang/test/Modules/crash-vfs-path-symlink-topheader.m @@ -1,4 +1,4 @@ -// REQUIRES: crash-recovery, shell +// REQUIRES: crash-recovery, symlinks // FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? // XFAIL: target={{.*-windows-gnu}} diff --git a/clang/test/Modules/embed-files-compressed.cpp b/clang/test/Modules/embed-files-compressed.cpp index aca9983ff160b..5318aeb10a81e 100644 --- a/clang/test/Modules/embed-files-compressed.cpp +++ b/clang/test/Modules/embed-files-compressed.cpp @@ -1,5 +1,4 @@ // REQUIRES: zlib || zstd -// REQUIRES: shell // // RUN: rm -rf %t // RUN: mkdir %t diff --git a/clang/test/Modules/embed-files.cpp b/clang/test/Modules/embed-files.cpp index 8e5a16e544008..946daaee9991e 100644 --- a/clang/test/Modules/embed-files.cpp +++ b/clang/test/Modules/embed-files.cpp @@ -13,7 +13,7 @@ // FIXME: This test is flaky on Windows because attempting to delete a file // after writing it just doesn't seem to work well, at least not in the lit // shell. -// REQUIRES: shell +// UNSUPPORTED: system-windows // RUN: rm %t/x.h // RUN: %clang_cc1 -fmodules -I%t -fmodule-map-file=%t/modulemap -fmodule-file=%t/a.pcm -fmodule-file=%t/b.pcm %s -verify #include "a.h" diff --git a/clang/test/Modules/exponential-paths.cpp b/clang/test/Modules/exponential-paths.cpp index b5641933f8d08..05b586bf5cd18 100644 --- a/clang/test/Modules/exponential-paths.cpp +++ b/clang/test/Modules/exponential-paths.cpp @@ -1,5 +1,3 @@ -// REQUIRES: shell -// // RUN: rm -rf %t // RUN: mkdir %t // diff --git a/clang/test/Modules/framework-name.m b/clang/test/Modules/framework-name.m index 52e68f12de5ae..3e0c45971f969 100644 --- a/clang/test/Modules/framework-name.m +++ b/clang/test/Modules/framework-name.m @@ -1,4 +1,5 @@ -// REQUIRES: shell +// REQUIRES: symlinks + // RUN: rm -rf %t.mcp %t // RUN: mkdir -p %t // RUN: ln -s %S/Inputs/NameInDir2.framework %t/NameInImport.framework diff --git a/clang/test/Modules/implicit-private-without-public.m b/clang/test/Modules/implicit-private-without-public.m index e4920bcc7ec6f..ee0674e64a252 100644 --- a/clang/test/Modules/implicit-private-without-public.m +++ b/clang/test/Modules/implicit-private-without-public.m @@ -1,4 +1,3 @@ -// REQUIRES: shell // RUN: rm -rf %t // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \ // RUN: -F%S/Inputs/implicit-private-without-public \ diff --git a/clang/test/Modules/inferred-framework-case.m b/clang/test/Modules/inferred-framework-case.m index 2ed443f2b5a18..64828b5cdd868 100644 --- a/clang/test/Modules/inferred-framework-case.m +++ b/clang/test/Modules/inferred-framework-case.m @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify -DA // FIXME: PR20299 - getCanonicalName() is not implemented on Windows. -// REQUIRES: shell +// UNSUPPORTED: system-windows @import MOdule; // expected-error{{module 'MOdule' not found}} @import Module; diff --git a/clang/test/Modules/module-file-modified.c b/clang/test/Modules/module-file-modified.c index 57160f34a46cf..1a02b3fa511b3 100644 --- a/clang/test/Modules/module-file-modified.c +++ b/clang/test/Modules/module-file-modified.c @@ -9,4 +9,3 @@ int foo = 0; // redefinition of 'foo' // CHECK: fatal error: file {{.*}} has been modified since the module file {{.*}} was built // CHECK: note: please rebuild precompiled file -// REQUIRES: shell diff --git a/clang/test/Modules/module-symlink.m b/clang/test/Modules/module-symlink.m index efdaf3db0dfef..9de1cf9b5fb5d 100644 --- a/clang/test/Modules/module-symlink.m +++ b/clang/test/Modules/module-symlink.m @@ -1,4 +1,4 @@ -// REQUIRES: shell +// REQUIRES: symlinks // RUN: rm -rf %t // RUN: %clang_cc1 -fmodules-cache-path=%t/modules -fmodules -fimplicit-module-maps -I %S/Inputs -emit-pch -o %t.pch %s -verify diff --git a/clang/test/Modules/modulemap-collision.m b/clang/test/Modules/modulemap-collision.m index 5ada45da3dae1..2778386dfd331 100644 --- a/clang/test/Modules/modulemap-collision.m +++ b/clang/test/Modules/modulemap-collision.m @@ -1,4 +1,5 @@ -// REQUIRES: shell +// Most likely platform specific sed differences +// UNSUPPORTED: system-windows // RUN: rm -rf %t // RUN: mkdir -p %t/sources %t/build diff --git a/clang/test/Modules/validate-file-content.m b/clang/test/Modules/validate-file-content.m index 9977aa4665f04..cff89884552b7 100644 --- a/clang/test/Modules/validate-file-content.m +++ b/clang/test/Modules/validate-file-content.m @@ -1,5 +1,3 @@ -// REQUIRES: shell -// // Check driver works // RUN: %clang -fmodules -fsyntax-only -fmodules-validate-input-files-content %s -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s // CHECK-CC1: -fvalidate-ast-input-files-content diff --git a/clang/test/PCH/validate-file-content.m b/clang/test/PCH/validate-file-content.m index b98979341b76a..8863b7abea3af 100644 --- a/clang/test/PCH/validate-file-content.m +++ b/clang/test/PCH/validate-file-content.m @@ -1,5 +1,3 @@ -// REQUIRES: shell -// // Check driver works // RUN: %clang -x objective-c-header -fsyntax-only -fpch-validate-input-files-content %s -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s // CHECK-CC1: -fvalidate-ast-input-files-content diff --git a/clang/test/Preprocessor/embed_zos.c b/clang/test/Preprocessor/embed_zos.c index 564a65f42afcd..12f9bf439ee8b 100644 --- a/clang/test/Preprocessor/embed_zos.c +++ b/clang/test/Preprocessor/embed_zos.c @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%t -verify // expected-no-diagnostics -// REQUIRES: shell, system-zos +// REQUIRES: system-zos const char data[] = { #embed diff --git a/clang/test/Preprocessor/nonportable-include-with-hmap.c b/clang/test/Preprocessor/nonportable-include-with-hmap.c index 07907dfb40d5b..f7e1abf69ad1e 100644 --- a/clang/test/Preprocessor/nonportable-include-with-hmap.c +++ b/clang/test/Preprocessor/nonportable-include-with-hmap.c @@ -1,4 +1,5 @@ -// REQUIRES: shell +// Most likely platform specific sed differences +// UNSUPPORTED: system-windows // REQUIRES: case-insensitive-filesystem // RUN: rm -f %t.hmap diff --git a/clang/test/Profile/cxx-hash-v2.cpp b/clang/test/Profile/cxx-hash-v2.cpp index 995fe008f5236..cb633d53f6f30 100644 --- a/clang/test/Profile/cxx-hash-v2.cpp +++ b/clang/test/Profile/cxx-hash-v2.cpp @@ -1,5 +1,3 @@ -// REQUIRES: shell - // Check that all of the hashes in this file are unique (i.e, that none of the // profiles for these functions are mutually interchangeable). // diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug-unclaimed/warn-unsafe-buffer-usage-debug-unclaimed.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug-unclaimed/warn-unsafe-buffer-usage-debug-unclaimed.cpp index ab3d925753d47..64dede2568df1 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug-unclaimed/warn-unsafe-buffer-usage-debug-unclaimed.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug-unclaimed/warn-unsafe-buffer-usage-debug-unclaimed.cpp @@ -13,7 +13,6 @@ // This debugging facility is only available in debug builds. // // REQUIRES: asserts -// REQUIRES: shell void test_unclaimed_use(int *p) { // expected-warning{{'p' is an unsafe pointer used for buffer access}} p++; // expected-note{{used in pointer arithmetic here}} \ diff --git a/clang/test/Tooling/auto-detect-from-source-parent-of-cwd.cpp b/clang/test/Tooling/auto-detect-from-source-parent-of-cwd.cpp index 762c89e9e52aa..cc017a08cffda 100644 --- a/clang/test/Tooling/auto-detect-from-source-parent-of-cwd.cpp +++ b/clang/test/Tooling/auto-detect-from-source-parent-of-cwd.cpp @@ -1,3 +1,5 @@ +// REQUIRES: symlinks + // RUN: rm -rf %t // RUN: mkdir -p %t/abc/def/ijk/qwe // RUN: echo "[{\"directory\":\".\",\"command\":\"clang++ -c %t/abc/def/ijk/qwe/test.cpp\",\"file\":\"%t/abc/def/ijk/qwe/test.cpp\"}]" | sed -e 's/\\/\\\\/g' > %t/compile_commands.json @@ -9,5 +11,3 @@ // CHECK: a type specifier is required // CHECK: /abc/def/ijk/qwe/test.cpp invalid; - -// REQUIRES: shell diff --git a/clang/test/Tooling/clang-check-pwd.cpp b/clang/test/Tooling/clang-check-pwd.cpp index 2e8d4a3fe12b6..309cee54aadd9 100644 --- a/clang/test/Tooling/clang-check-pwd.cpp +++ b/clang/test/Tooling/clang-check-pwd.cpp @@ -1,3 +1,5 @@ +// REQUIRES: symlinks + // RUN: rm -rf %t // RUN: mkdir %t // RUN: echo "[{\"directory\":\".\",\"command\":\"clang++ -c %t/test.cpp\",\"file\":\"%t/test.cpp\"}]" | sed -e 's/\\/\\\\/g' > %t/compile_commands.json diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 56aa5eb64fa36..3fbda5489a9de 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -198,6 +198,9 @@ def __init__(self, lit_config, config): if gmalloc_path_str is not None: self.with_environment("DYLD_INSERT_LIBRARIES", gmalloc_path_str) + if not platform.system() == "Windows": + features.add("symlinks") + def _find_git_windows_unix_tools(self, tools_needed): assert sys.platform == "win32" import winreg From 40e85fcaaa04515e511c2a2a8899e46bd6c300ba Mon Sep 17 00:00:00 2001 From: Dmitry Chigarev Date: Fri, 12 Sep 2025 03:25:51 +0200 Subject: [PATCH 061/734] [MLIR][XeGPU][VectorToXeGPU] Fix transfer_read/write cases with non-contiguous memrefs (#158126) This PR fixes a case where a source memref in `vector.transfer_read/write` is not contiguous, which violates the `memref.collapse_shape` semantic that is used in the lowering.
An example of a failing test ```mlir gpu.module @xevm_module { gpu.func @load_from_subview(%source: memref<4096x4096xf16>, %off1: index, %off2: index) -> vector<8xf16> { %c0 = arith.constant 0.0 : f16 %subview = memref.subview %source[%off1, %off2] [256, 256] [1, 1] : memref<4096x4096xf16> to memref<256x256xf16, strided<[4096, 1], offset: ?>> %0 = vector.transfer_read %subview[%off2, %off2], %c0 {in_bounds = [true]} : memref<256x256xf16, strided<[4096, 1], offset: ?>>, vector<8xf16> gpu.return %0 : vector<8xf16> } } ``` Fails with: ``` /home/user/llvm/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir:404:8: error: 'memref.collapse_shape' op invalid source layout map or collapsing non-contiguous dims %0 = vector.transfer_read %subview[%off2, %off2], %c0 ^ /home/user/llvm/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir:404:8: note: see current operation: %8 = "memref.collapse_shape"(%2) <{reassociation = [[0, 1]]}> : (memref<256x256xf16, strided<[4096, 1], offset: ?>>) -> memref<65536xf16> ```
A suggestion was to replace `memref.collapse_shape` with `memref.extract_aligned_pointer_as_index` which is done in this PR. Since `extract_aligned_pointer` applied to a subview returns an original pointer without subview offsets, this PR also adds a logic to use an offset obtained from `memref.extract_strided_metadata` in `baseOffset` calculation in `computeOffsets`. --------- Signed-off-by: dchigarev --- .../VectorToXeGPU/VectorToXeGPU.cpp | 111 ++++++++---------- .../VectorToXeGPU/transfer-read-to-xegpu.mlir | 77 +++++++++--- .../transfer-write-to-xegpu.mlir | 72 ++++++++++-- 3 files changed, 174 insertions(+), 86 deletions(-) diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp index 819c2e5973ffd..852c322cc6467 100644 --- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp +++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp @@ -180,26 +180,31 @@ static void adjustStridesForPermutation(AffineMap permMap, strides = applyPermutation(strides, perms64); } -// Computes memory strides for vector transfer operations, handling both -// static and dynamic memrefs while applying permutation transformations -// for XeGPU lowering. -static SmallVector computeStrides(VectorTransferOpInterface xferOp, - PatternRewriter &rewriter) { +// Computes memory strides and a memref offset for vector transfer operations, +// handling both static and dynamic memrefs while applying permutation +// transformations for XeGPU lowering. +static std::pair, Value> +computeMemrefMeta(VectorTransferOpInterface xferOp, PatternRewriter &rewriter) { SmallVector strides; Value baseMemref = xferOp.getBase(); AffineMap permMap = xferOp.getPermutationMap(); MemRefType memrefType = dyn_cast(baseMemref.getType()); Location loc = xferOp.getLoc(); + Value offsetVal = nullptr; if (memrefType.hasStaticShape()) { int64_t offset; SmallVector intStrides; if (failed(memrefType.getStridesAndOffset(intStrides, offset))) - return {}; + return {{}, offsetVal}; // Wrap static strides as MLIR values for (int64_t s : intStrides) strides.push_back(arith::ConstantIndexOp::create(rewriter, loc, s)); - } else { + if (!ShapedType::isDynamic(offset)) + offsetVal = arith::ConstantIndexOp::create(rewriter, loc, offset); + } + + if (strides.empty() || !offsetVal) { // For dynamic shape memref, use memref.extract_strided_metadata to get // stride values unsigned rank = memrefType.getRank(); @@ -220,11 +225,16 @@ static SmallVector computeStrides(VectorTransferOpInterface xferOp, auto meta = memref::ExtractStridedMetadataOp::create( rewriter, loc, resultTypes, baseMemref); - strides.append(meta.getStrides().begin(), meta.getStrides().end()); + + if (strides.empty()) + strides.append(meta.getStrides().begin(), meta.getStrides().end()); + + if (!offsetVal) + offsetVal = meta.getOffset(); } // Adjust strides according to the permutation map (e.g., for transpose) adjustStridesForPermutation(permMap, strides); - return strides; + return {strides, offsetVal}; } // This function compute the vectors of localOffsets for scattered load/stores. @@ -254,10 +264,10 @@ static SmallVector computeStrides(VectorTransferOpInterface xferOp, // %23 = arith.add %20, %21 // %local_offsets = arith.add %22, %23 // %orig_offset = %block_id_y * 4x2x6x32 // consider using affine map -// %offsets = orig_offset + local_offsets +// %offsets = memref_offset + orig_offset + local_offsets static Value computeOffsets(VectorTransferOpInterface xferOp, - PatternRewriter &rewriter, - ArrayRef strides) { + PatternRewriter &rewriter, ArrayRef strides, + Value baseOffset) { Location loc = xferOp.getLoc(); VectorType vectorType = xferOp.getVectorType(); SmallVector indices(xferOp.getIndices().begin(), @@ -315,51 +325,30 @@ static Value computeOffsets(VectorTransferOpInterface xferOp, arith::AddIOp::create(rewriter, loc, localOffsets, broadcasted[i]); // Compute base offset from transfer read indices - Value baseOffset = nullptr; - if (!indices.empty()) { - baseOffset = arith::ConstantIndexOp::create(rewriter, loc, 0); - for (size_t i = 0; i < indices.size(); ++i) { - Value strideVal = strides[i]; - Value offsetContrib = - arith::MulIOp::create(rewriter, loc, indices[i], strideVal); - baseOffset = - arith::AddIOp::create(rewriter, loc, baseOffset, offsetContrib); - } - // Broadcast base offset to match vector shape - Value bcastBase = vector::BroadcastOp::create( - rewriter, loc, fullIndexVectorType, baseOffset); - localOffsets = - arith::AddIOp::create(rewriter, loc, bcastBase, localOffsets); + for (size_t i = 0; i < indices.size(); ++i) { + Value strideVal = strides[i]; + Value offsetContrib = + arith::MulIOp::create(rewriter, loc, indices[i], strideVal); + baseOffset = + arith::AddIOp::create(rewriter, loc, baseOffset, offsetContrib); } + // Broadcast base offset to match vector shape + Value bcastBase = vector::BroadcastOp::create( + rewriter, loc, fullIndexVectorType, baseOffset); + localOffsets = arith::AddIOp::create(rewriter, loc, bcastBase, localOffsets); return localOffsets; } -// Collapse memref shape to 1D -static Value collapseMemrefTo1D(VectorTransferOpInterface xferOp, - PatternRewriter &rewriter) { +// Convert memref to i64 base pointer +static Value memrefToIndexPtr(VectorTransferOpInterface xferOp, + PatternRewriter &rewriter) { Location loc = xferOp.getLoc(); - - Value baseMemref = xferOp.getBase(); - MemRefType memrefType = dyn_cast(baseMemref.getType()); - Type elementType = memrefType.getElementType(); - - // Compute the total number of elements in the memref - MemRefType flatMemrefType; - if (memrefType.hasStaticShape()) { - auto totalElements = memrefType.getNumElements(); - flatMemrefType = MemRefType::get({totalElements}, elementType); - } else { - flatMemrefType = MemRefType::get({ShapedType::kDynamic}, elementType); - } - - SmallVector reassociation; - ReassociationIndices allDims = - llvm::to_vector(llvm::seq(0, memrefType.getRank())); - reassociation.push_back(allDims); - - auto collapseOp = memref::CollapseShapeOp::create( - rewriter, loc, flatMemrefType, baseMemref, reassociation); - return collapseOp; + auto indexPtr = memref::ExtractAlignedPointerAsIndexOp::create( + rewriter, loc, xferOp.getBase()) + .getResult(); + return arith::IndexCastOp::create(rewriter, loc, rewriter.getI64Type(), + indexPtr) + .getResult(); } static LogicalResult lowerToScatteredLoadOp(vector::TransferReadOp readOp, @@ -372,13 +361,14 @@ static LogicalResult lowerToScatteredLoadOp(vector::TransferReadOp readOp, if (!memrefType) return rewriter.notifyMatchFailure(readOp, "Expected memref source"); - SmallVector strides = computeStrides(readOp, rewriter); - if (strides.empty()) + auto meta = computeMemrefMeta(readOp, rewriter); + if (meta.first.empty()) return rewriter.notifyMatchFailure(readOp, "Failed to compute strides"); - Value localOffsets = computeOffsets(readOp, rewriter, strides); + Value localOffsets = + computeOffsets(readOp, rewriter, meta.first, meta.second); - Value flatMemref = collapseMemrefTo1D(readOp, rewriter); + Value flatMemref = memrefToIndexPtr(readOp, rewriter); Value mask = vector::ConstantMaskOp::create( rewriter, loc, VectorType::get(vectorShape, rewriter.getI1Type()), @@ -405,11 +395,14 @@ static LogicalResult lowerToScatteredStoreOp(vector::TransferWriteOp writeOp, if (!memrefType) return rewriter.notifyMatchFailure(writeOp, "Expected memref source"); - SmallVector strides = computeStrides(writeOp, rewriter); + auto meta = computeMemrefMeta(writeOp, rewriter); + if (meta.first.empty()) + return rewriter.notifyMatchFailure(writeOp, "Failed to compute strides"); - Value localOffsets = computeOffsets(writeOp, rewriter, strides); + Value localOffsets = + computeOffsets(writeOp, rewriter, meta.first, meta.second); - Value flatMemref = collapseMemrefTo1D(writeOp, rewriter); + Value flatMemref = memrefToIndexPtr(writeOp, rewriter); Value mask = vector::ConstantMaskOp::create( rewriter, loc, VectorType::get(vectorShape, rewriter.getI1Type()), diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir index b373bdab80567..c4ca79af1bd9a 100644 --- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir @@ -27,8 +27,9 @@ gpu.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vector // LOAD-GATHER-COUNT2: arith.addi {{.*}} : index // LOAD-GATHER: %[[SPLAT:.+]] = vector.broadcast {{.*}}: index to vector<8xindex> // LOAD-GATHER: %[[IDX:.+]] = arith.addi %[[SPLAT]], %[[STEP]] : vector<8xindex> -// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.collapse_shape %[[SRC]] {{\[}}[0, 1, 2]{{\]}} : memref<8x16x32xf32> into memref<4096xf32> -// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : memref<4096xf32>, vector<8xindex>, vector<8xi1> -> vector<8xf32> +// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SRC]] : memref<8x16x32xf32> -> index +// LOAD-GATHER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE_I]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : i64, vector<8xindex>, vector<8xi1> -> vector<8xf32> } @@ -62,8 +63,9 @@ gpu.func @load_2D_vector(%source: memref<8x16x32xf32>, // LOAD-GATHER-COUNT2: arith.addi {{.*}} : index // LOAD-GATHER: %[[SPLAT:.+]] = vector.broadcast {{.*}}: index to vector<8x16xindex> // LOAD-GATHER: %[[IDX:.+]] = arith.addi %[[SPLAT]], {{.*}}: vector<8x16xindex> -// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.collapse_shape %[[SRC]] {{\[}}[0, 1, 2]{{\]}} : memref<8x16x32xf32> into memref<4096xf32> -// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : memref<4096xf32>, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf32> +// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SRC]] : memref<8x16x32xf32> -> index +// LOAD-GATHER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE_I]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : i64, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf32> } @@ -124,8 +126,9 @@ gpu.func @load_transposed(%source: memref<32x64xf32>, // LOAD-GATHER-COUNT2: arith.addi {{.*}} : index // LOAD-GATHER: %[[BCAST2:.+]] = vector.broadcast {{.*}} : index to vector<8x16xindex> // LOAD-GATHER: %[[IDX:.+]] = arith.addi %[[BCAST2]], {{.*}}: vector<8x16xindex> -// LOAD-GATHER: %[[COLLAPSE:.*]] = memref.collapse_shape %arg0 {{\[\[}}0, 1{{\]\]}} : memref<32x64xf32> into memref<2048xf32> -// LOAD-GATHER: %[[LOAD:.*]] = xegpu.load %[[COLLAPSE]][%[[IDX]]], %[[CST]] : memref<2048xf32>, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf32> +// LOAD-GATHER: %[[COLLAPSE:.*]] = memref.extract_aligned_pointer_as_index %arg0 : memref<32x64xf32> -> index +// LOAD-GATHER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// LOAD-GATHER: %[[LOAD:.*]] = xegpu.load %[[COLLAPSE_I]][%[[IDX]]], %[[CST]] : i64, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf32> } @@ -164,8 +167,9 @@ gpu.func @load_dynamic_source(%source: memref, // LOAD-GATHER-COUNT2: arith.addi {{.*}} : index // LOAD-GATHER: %[[BROADIDX:.+]] = vector.broadcast {{.*}} : index to vector<8x16xindex> // LOAD-GATHER: %[[FINALIDX:.+]] = arith.addi %[[BROADIDX]], {{.*}} : vector<8x16xindex> -// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2]{{\]}} : memref into memref -// LOAD-GATHER: %[[RES:.+]] = xegpu.load %[[COLLAPSE]][%[[FINALIDX]]], %[[CST]] : memref, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf32> +// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[ARG0]] : memref -> index +// LOAD-GATHER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// LOAD-GATHER: %[[RES:.+]] = xegpu.load %[[COLLAPSE_I]]{{\[}}%[[FINALIDX]]{{\]}}, %[[CST]] : i64, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf32> // LOAD-GATHER: gpu.return %[[RES]] : vector<8x16xf32> } @@ -195,8 +199,9 @@ gpu.func @load_dynamic_source2(%source: memref, // LOAD-GATHER-COUNT2: arith.addi {{.*}} : index // LOAD-GATHER-DAG: %[[BCASTIDX:.+]] = vector.broadcast {{.*}} : index to vector<8x16xindex> // LOAD-GATHER-DAG: %[[OFFSETS:.+]] = arith.addi %[[BCASTIDX]], {{.*}} : vector<8x16xindex> -// LOAD-GATHER-DAG: %[[COLLAPSE:.+]] = memref.collapse_shape %arg0 {{\[}}[0, 1, 2]{{\]}} : memref into memref -// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE]]{{\[}}%[[OFFSETS]]{{\]}}, %[[CST_0]] : memref, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf32> +// LOAD-GATHER-DAG: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %arg0 : memref -> index +// LOAD-GATHER-DAG: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE_I]]{{\[}}%[[OFFSETS]]{{\]}}, %[[CST_0]] : i64, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf32> } @@ -224,8 +229,9 @@ gpu.func @load_dynamic_source3(%source: memref, // LOAD-GATHER-COUNT3: arith.addi {{.*}} : vector<2x4x8x16xindex> // LOAD-GATHER: %[[SPLAT:.+]] = vector.broadcast {{.*}} : index to vector<2x4x8x16xindex> // LOAD-GATHER: %[[IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} : vector<2x4x8x16xindex> -// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.collapse_shape %[[SRC]] {{\[}}[0, 1, 2, 3, 4]{{\]}} : memref into memref -// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : memref, vector<2x4x8x16xindex>, vector<2x4x8x16xi1> -> vector<2x4x8x16xf32> +// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SRC]] : memref -> index +// LOAD-GATHER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE_I]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : i64, vector<2x4x8x16xindex>, vector<2x4x8x16xi1> -> vector<2x4x8x16xf32> // LOAD-GATHER: return %[[VEC]] } @@ -254,8 +260,9 @@ gpu.func @load_high_dim_vector(%source: memref<16x32x64xf32>, // LOAD-GATHER-COUNT2: arith.addi {{.*}} : vector<8x16x32xindex> // LOAD-GATHER: %[[BCASTOFF:.+]] = vector.broadcast {{.*}} : index to vector<8x16x32xindex> // LOAD-GATHER: %[[IDX:.+]] = arith.addi %[[BCASTOFF]], {{.*}} : vector<8x16x32xindex> -// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.collapse_shape %arg0 {{\[}}[0, 1, 2]{{\]}} : memref<16x32x64xf32> into memref<32768xf32> -// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE]][%[[IDX]]], %[[CST]] : memref<32768xf32>, vector<8x16x32xindex>, vector<8x16x32xi1> -> vector<8x16x32xf32> +// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %arg0 : memref<16x32x64xf32> -> index +// LOAD-GATHER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE_I]][%[[IDX]]], %[[CST]] : i64, vector<8x16x32xindex>, vector<8x16x32xi1> -> vector<8x16x32xf32> } @@ -283,8 +290,9 @@ gpu.func @load_transpose_f16(%source: memref<32x64xf16>, // LOAD-GATHER-COUNT2: arith.addi {{.*}} : index // LOAD-GATHER: %[[BCAST2:.+]] = vector.broadcast {{.*}} : index to vector<8x16xindex> // LOAD-GATHER: %[[IDX:.+]] = arith.addi %[[BCAST2]], {{.*}}: vector<8x16xindex> -// LOAD-GATHER: %[[COLLAPSE:.*]] = memref.collapse_shape %arg0 {{\[\[}}0, 1{{\]\]}} : memref<32x64xf16> into memref<2048xf16> -// LOAD-GATHER: %[[LOAD:.*]] = xegpu.load %[[COLLAPSE]][%[[IDX]]], %[[CST]] : memref<2048xf16>, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf16> +// LOAD-GATHER: %[[COLLAPSE:.*]] = memref.extract_aligned_pointer_as_index %arg0 : memref<32x64xf16> -> index +// LOAD-GATHER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// LOAD-GATHER: %[[LOAD:.*]] = xegpu.load %[[COLLAPSE_I]][%[[IDX]]], %[[CST]] : i64, vector<8x16xindex>, vector<8x16xi1> -> vector<8x16xf16> } // ----- @@ -396,3 +404,40 @@ gpu.func @no_load_unsupported_map(%source: memref<16x32x64xf32>, // LOAD-GATHER: vector.transfer_read } +// ----- +gpu.module @xevm_module { +gpu.func @load_from_subview(%source: memref<4096x4096xf16>, %off1: index, %off2: index) -> vector<8xf16> { + %c0 = arith.constant 0.0 : f16 + %subview = memref.subview %source[%off1, %off2] [256, 256] [1, 1] : memref<4096x4096xf16> to memref<256x256xf16, strided<[4096, 1], offset: ?>> + %0 = vector.transfer_read %subview[%off2, %off2], %c0 + {in_bounds = [true]} : memref<256x256xf16, strided<[4096, 1], offset: ?>>, vector<8xf16> + gpu.return %0 : vector<8xf16> +} + +// LOAD-ND-LABEL: @load_from_subview( +// LOAD-ND-SAME: %[[SRC:.+]]: memref<4096x4096xf16>, +// LOAD-ND-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index +// LOAD-ND: %[[SUBVIEW:.+]] = memref.subview %[[SRC]][%[[OFF1]], %[[OFF2]]] [256, 256] [1, 1] : memref<4096x4096xf16> to memref<256x256xf16, strided<[4096, 1], offset: ?>> +// LOAD-ND: %[[DESC:.+]] = xegpu.create_nd_tdesc +// LOAD-ND-SAME: %[[SUBVIEW]][%[[OFF2]], %[[OFF2]]] +// LOAD-ND-SAME: memref<256x256xf16, strided<[4096, 1], offset: ?>> -> !xegpu.tensor_desc<8xf16, +// LOAD-ND-SAME: boundary_check = false +// LOAD-ND: %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf16> +// LOAD-ND: return %[[VEC]] + +// LOAD-GATHER-LABEL: @load_from_subview( +// LOAD-GATHER-SAME: %[[SRC:.+]]: memref<4096x4096xf16>, +// LOAD-GATHER-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index +// LOAD-GATHER: %[[CST:.+]] = arith.constant dense : vector<8xi1> +// LOAD-GATHER: %[[SUBVIEW:.+]] = memref.subview %[[SRC]][%[[OFF1]], %[[OFF2]]] [256, 256] [1, 1] : memref<4096x4096xf16> to memref<256x256xf16, strided<[4096, 1], offset: ?>> +// LOAD-GATHER: %[[BB:.+]], %[[OFFSET:.+]],{{.*}},{{.*}} = memref.extract_strided_metadata %[[SUBVIEW]] : memref<256x256xf16, strided<[4096, 1], offset: ?>> -> memref, index, index, index, index, index +// LOAD-GATHER: %[[STEP:.+]] = vector.step : vector<8xindex> +// LOAD-GATHER: arith.muli {{.*}} : index +// LOAD-GATHER: arith.addi %[[OFFSET]]{{.*}} : index +// LOAD-GATHER: arith.addi {{.*}} : index +// LOAD-GATHER: %[[SPLAT:.+]] = vector.broadcast {{.*}}: index to vector<8xindex> +// LOAD-GATHER: %[[IDX:.+]] = arith.addi %[[SPLAT]], %[[STEP]] : vector<8xindex> +// LOAD-GATHER: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SUBVIEW]] : memref<256x256xf16, strided<[4096, 1], offset: ?>> -> index +// LOAD-GATHER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// LOAD-GATHER: %[[VEC:.+]] = xegpu.load %[[COLLAPSE_I]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : i64, vector<8xindex>, vector<8xi1> -> vector<8xf16> +} diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir index b3f761a545ee1..fcfc9414da4f6 100644 --- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir @@ -30,8 +30,9 @@ gpu.func @store_1D_vector(%vec: vector<8xf32>, // STORE-SCATTER-COUNT2: arith.addi {{.*}} : index // STORE-SCATTER-DAG: %[[BCAST:.+]] = vector.broadcast {{.*}} : index to vector<8xindex> // STORE-SCATTER-DAG: %[[IDX:.+]] = arith.addi %[[BCAST]], %{{.*}} : vector<8xindex> -// STORE-SCATTER-DAG: %[[COLLAPSE:.+]] = memref.collapse_shape %[[SRC]] {{\[}}[0, 1, 2]{{\]}} : memref<8x16x32xf32> into memref<4096xf32> -// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : vector<8xf32>, memref<4096xf32>, vector<8xindex>, vector<8xi1> +// STORE-SCATTER-DAG: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SRC]] : memref<8x16x32xf32> -> index +// STORE-SCATTER-DAG: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE_I]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : vector<8xf32>, i64, vector<8xindex>, vector<8xi1> } // ----- @@ -64,8 +65,9 @@ gpu.func @store_2D_vector(%vec: vector<8x16xf32>, // STORE-SCATTER-COUNT2: vector.broadcast {{.*}} : vector<8x16xindex> // STORE-SCATTER-DAG: %[[BCAST2:.+]] = vector.broadcast {{.*}} : index to vector<8x16xindex> // STORE-SCATTER-DAG: %[[IDX:.+]] = arith.addi %[[BCAST2]], {{.*}} : vector<8x16xindex> -// STORE-SCATTER-DAG: %[[COLLAPSE:.+]] = memref.collapse_shape %[[SRC]] {{\[}}[0, 1, 2]{{\]}} : memref<8x16x32xf32> into memref<4096xf32> -// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : vector<8x16xf32>, memref<4096xf32>, vector<8x16xindex>, vector<8x16xi1> +// STORE-SCATTER-DAG: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SRC]] : memref<8x16x32xf32> -> index +// STORE-SCATTER-DAG: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE_I]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : vector<8x16xf32>, i64, vector<8x16xindex>, vector<8x16xi1> } // ----- @@ -104,8 +106,9 @@ gpu.func @store_dynamic_source(%vec: vector<8x16xf32>, // STORE-SCATTER-COUNT2: vector.broadcast {{.*}} : vector<8x16xindex> // STORE-SCATTER-DAG: %[[BCAST2:.+]] = vector.broadcast {{.*}} : index to vector<8x16xindex> // STORE-SCATTER-DAG: %[[IDX:.+]] = arith.addi %[[BCAST2]], {{.*}} : vector<8x16xindex> -// STORE-SCATTER-DAG: %[[COLLAPSE:.+]] = memref.collapse_shape %[[SRC]] {{\[}}[0, 1, 2]{{\]}} : memref into memref -// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : vector<8x16xf32>, memref, vector<8x16xindex>, vector<8x16xi1> +// STORE-SCATTER-DAG: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SRC]] : memref -> index +// STORE-SCATTER-DAG: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE_I]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : vector<8x16xf32>, i64, vector<8x16xindex>, vector<8x16xi1> } // ----- @@ -155,8 +158,9 @@ gpu.func @no_store_transposed(%vec: vector<8x16xf32>, // STORE-SCATTER-COUNT2: vector.broadcast {{.*}} : vector<8x16xindex> // STORE-SCATTER-DAG: %[[BCAST2:.+]] = vector.broadcast {{.*}} : index to vector<8x16xindex> // STORE-SCATTER-DAG: %[[IDX:.+]] = arith.addi %[[BCAST2]], {{.*}} : vector<8x16xindex> -// STORE-SCATTER-DAG: %[[COLLAPSE:.+]] = memref.collapse_shape %[[SRC]] {{\[}}[0, 1]{{\]}} : memref<32x64xf32> into memref<2048xf32> -// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : vector<8x16xf32>, memref<2048xf32>, vector<8x16xindex>, vector<8x16xi1> +// STORE-SCATTER-DAG: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SRC]] : memref<32x64xf32> -> index +// STORE-SCATTER-DAG: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE_I]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : vector<8x16xf32>, i64, vector<8x16xindex>, vector<8x16xi1> } // ----- @@ -186,8 +190,9 @@ gpu.func @store_high_dim_vector(%vec: vector<8x16x32xf32>, // STORE-SCATTER-COUNT2: arith.addi {{.*}} : vector<8x16x32xindex> // STORE-SCATTER: %[[BCASTOFF:.+]] = vector.broadcast {{.*}} : index to vector<8x16x32xindex> // STORE-SCATTER: %[[IDX:.+]] = arith.addi %[[BCASTOFF]], {{.*}} : vector<8x16x32xindex> -// STORE-SCATTER: %[[COLLAPSE:.+]] = memref.collapse_shape %[[SRC]] {{\[}}[0, 1, 2]{{\]}} : memref<16x32x64xf32> into memref<32768xf32> -// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE]][%[[IDX]]], %[[CST]] : vector<8x16x32xf32>, memref<32768xf32>, vector<8x16x32xindex>, vector<8x16x32xi1> +// STORE-SCATTER: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SRC]] : memref<16x32x64xf32> -> index +// STORE-SCATTER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE_I]][%[[IDX]]], %[[CST]] : vector<8x16x32xf32>, i64, vector<8x16x32xindex>, vector<8x16x32xi1> } // ----- @@ -275,4 +280,49 @@ gpu.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>, // STORE-SCATTER-LABEL: @no_store_out_of_bounds_1D_vector( // STORE-SCATTER: vector.transfer_write -} \ No newline at end of file +} + +// ----- +gpu.module @xevm_module { +gpu.func @store_to_subview(%vec: vector<8xf16>, + %source: memref<4096x4096xf16>, %off1: index, %off2: index) { + %subview = memref.subview %source[%off1, %off2] [256, 256] [1, 1] + : memref<4096x4096xf16> + to memref<256x256xf16, strided<[4096, 1], offset: ?>> + vector.transfer_write %vec, %subview[%off2, %off2] + {in_bounds = [true]} + : vector<8xf16>, memref<256x256xf16, strided<[4096, 1], offset: ?>> + gpu.return +} +// STORE-ND-LABEL: @store_to_subview( +// STORE-ND-SAME: %[[VEC:.+]]: vector<8xf16>, +// STORE-ND-SAME: %[[SRC:.+]]: memref<4096x4096xf16>, +// STORE-ND-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index +// STORE-ND: %[[SUBVIEW:.+]] = memref.subview %[[SRC]][%[[OFF1]], %[[OFF2]]] [256, 256] [1, 1] +// STORE-ND-SAME: : memref<4096x4096xf16> to memref<256x256xf16, strided<[4096, 1], offset: ?>> +// STORE-ND: %[[DESC:.+]] = xegpu.create_nd_tdesc +// STORE-ND-SAME: %[[SUBVIEW]][%[[OFF2]], %[[OFF2]]] +// STORE-ND-SAME: memref<256x256xf16, strided<[4096, 1], offset: ?>> -> !xegpu.tensor_desc<8xf16, +// STORE-ND-SAME: boundary_check = false +// STORE-ND: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf16> + +// STORE-SCATTER-LABEL: @store_to_subview( +// STORE-SCATTER-SAME: %[[VEC:.+]]: vector<8xf16>, +// STORE-SCATTER-SAME: %[[SRC:.+]]: memref<4096x4096xf16>, +// STORE-SCATTER-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index +// STORE-SCATTER: %[[CST:.+]] = arith.constant dense : vector<8xi1> +// STORE-SCATTER: %[[SUBVIEW:.+]] = memref.subview %[[SRC]][%[[OFF1]], %[[OFF2]]] [256, 256] [1, 1] +// STORE-SCATTER-SAME: : memref<4096x4096xf16> to memref<256x256xf16, strided<[4096, 1], offset: ?>> +// STORE-SCATTER: %[[BB:.+]], %[[OFFSET:.+]], {{.*}}, {{.*}} = memref.extract_strided_metadata %[[SUBVIEW]] +// STORE-SCATTER-SAME: : memref<256x256xf16, strided<[4096, 1], offset: ?>> -> memref, index, index, index, index, index +// STORE-SCATTER: %[[STEP:.+]] = vector.step : vector<8xindex> +// STORE-SCATTER: arith.muli {{.*}} : index +// STORE-SCATTER: arith.addi %[[OFFSET]]{{.*}} : index +// STORE-SCATTER: arith.addi {{.*}} : index +// STORE-SCATTER: %[[SPLAT:.+]] = vector.broadcast {{.*}} : index to vector<8xindex> +// STORE-SCATTER: %[[IDX:.+]] = arith.addi %[[SPLAT]], %[[STEP]] : vector<8xindex> +// STORE-SCATTER: %[[COLLAPSE:.+]] = memref.extract_aligned_pointer_as_index %[[SUBVIEW]] +// STORE-SCATTER-SAME: : memref<256x256xf16, strided<[4096, 1], offset: ?>> -> index +// STORE-SCATTER: %[[COLLAPSE_I:.+]] = arith.index_cast %[[COLLAPSE]] : index to i64 +// STORE-SCATTER: xegpu.store %[[VEC]], %[[COLLAPSE_I]]{{\[}}%[[IDX]]{{\]}}, %[[CST]] : vector<8xf16>, i64, vector<8xindex>, vector<8xi1> +} From 3ade8746ee66ad1394c9b5847a86fc326a942d06 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Fri, 12 Sep 2025 10:25:58 +0900 Subject: [PATCH 062/734] [clang] Look through parens around reinterpret_cast to emit a warning (#157033) Clang warns about UB when a `reinterpret_cast` is dereferenced as an incompatible type: ``` long l; *reinterpret_cast(&l) // UB ``` However, the code was too strict and did not handle extra parens around a `reinterpret_cast`, so the following case was not diagnosed: ``` long l; *(reinterpret_cast(&l)) // UB, but no warning ``` The patch now skips ParenExpr when looking for a CXXReinterpretCastExpr to enable a diagnostic for the second case. --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/Sema/SemaExpr.cpp | 2 +- clang/test/SemaCXX/reinterpret-cast.cpp | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e1e497ccdbccd..060f3d982b850 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -283,6 +283,8 @@ Improvements to Clang's diagnostics pointers under ``-Wthread-safety-beta`` (still experimental), which reduces both false positives but also false negatives through more precise analysis. +- Clang now looks through parenthesis for ``-Wundefined-reinterpret-cast`` diagnostic. + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index aba00dc8ff9b6..bd62ac6234180 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -14784,7 +14784,7 @@ static QualType CheckIndirectionOperand(Sema &S, Expr *Op, ExprValueKind &VK, QualType OpTy = Op->getType(); QualType Result; - if (isa(Op)) { + if (isa(Op->IgnoreParens())) { QualType OpOrigType = Op->IgnoreParenCasts()->getType(); S.CheckCompatibleReinterpretCast(OpOrigType, OpTy, /*IsDereference*/true, Op->getSourceRange()); diff --git a/clang/test/SemaCXX/reinterpret-cast.cpp b/clang/test/SemaCXX/reinterpret-cast.cpp index bfb808773b900..10b2ed183e2a5 100644 --- a/clang/test/SemaCXX/reinterpret-cast.cpp +++ b/clang/test/SemaCXX/reinterpret-cast.cpp @@ -167,6 +167,10 @@ void dereference_reinterpret_cast() { (void)reinterpret_cast(d); // expected-warning {{reinterpret_cast from 'double' to 'float &' has undefined behavior}} (void)*reinterpret_cast(&d); // expected-warning {{dereference of type 'float *' that was reinterpret_cast from type 'double *' has undefined behavior}} + // Look through parens + (void)*(reinterpret_cast(&l)); // expected-warning {{dereference of type 'double *' that was reinterpret_cast from type 'long *' has undefined behavior}} + (void)*((reinterpret_cast((&l)))); // expected-warning {{dereference of type 'double *' that was reinterpret_cast from type 'long *' has undefined behavior}} + // TODO: add warning for tag types (void)reinterpret_cast(b); (void)*reinterpret_cast(&b); From 1329af9c28582bd6b9d1e3e605f6243eb0e01683 Mon Sep 17 00:00:00 2001 From: Weibo He Date: Fri, 12 Sep 2025 09:27:08 +0800 Subject: [PATCH 063/734] Revert "[LoopInfo] Pointer to stack object may not be loop invariant in a coroutine function (#149936)" (#157986) Since #156788 has resolved #149604, we can revert this workaround now. --- llvm/include/llvm/Analysis/LoopInfo.h | 5 +- .../include/llvm/Transforms/Utils/LoopUtils.h | 3 +- llvm/lib/Analysis/LoopInfo.cpp | 22 ++---- llvm/lib/Transforms/Scalar/LICM.cpp | 8 +- llvm/test/Transforms/LICM/licm-coroutine.ll | 78 ------------------- 5 files changed, 12 insertions(+), 104 deletions(-) delete mode 100644 llvm/test/Transforms/LICM/licm-coroutine.ll diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index f80744e70f7ad..a7a6a2753709c 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -59,12 +59,11 @@ class LLVM_ABI Loop : public LoopBase { }; /// Return true if the specified value is loop invariant. - bool isLoopInvariant(const Value *V, bool HasCoroSuspendInst = false) const; + bool isLoopInvariant(const Value *V) const; /// Return true if all the operands of the specified instruction are loop /// invariant. - bool hasLoopInvariantOperands(const Instruction *I, - bool HasCoroSuspendInst = false) const; + bool hasLoopInvariantOperands(const Instruction *I) const; /// If the given value is an instruction inside of the loop and it can be /// hoisted, do so to make it trivially loop-invariant. diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 5bef67eb021ca..c5dbb2bdd1dd8 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -185,8 +185,7 @@ LLVM_ABI bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, - bool, bool AllowSpeculation, - bool HasCoroSuspendInst = false); + bool, bool AllowSpeculation); /// Return true if the induction variable \p IV in a Loop whose latch is /// \p LatchBlock would become dead if the exit test \p Cond were removed. diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 6ba6073cce950..a8c3173bb1794 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -58,26 +58,14 @@ static cl::opt // Loop implementation // -bool Loop::isLoopInvariant(const Value *V, bool HasCoroSuspendInst) const { - if (const Instruction *I = dyn_cast(V)) { - // FIXME: this is semantically inconsistent. We're tracking a proper fix in - // issue #149604. - // If V is a pointer to stack object and L contains a coro.suspend function - // call, then V may not be loop invariant because the ramp function and - // resume function have different stack frames. - if (HasCoroSuspendInst && isa(I)) - return false; - else - return !contains(I); - } +bool Loop::isLoopInvariant(const Value *V) const { + if (const Instruction *I = dyn_cast(V)) + return !contains(I); return true; // All non-instructions are loop invariant } -bool Loop::hasLoopInvariantOperands(const Instruction *I, - bool HasCoroSuspendInst) const { - return all_of(I->operands(), [&](Value *V) { - return isLoopInvariant(V, HasCoroSuspendInst); - }); +bool Loop::hasLoopInvariantOperands(const Instruction *I) const { + return all_of(I->operands(), [&](Value *V) { return isLoopInvariant(V); }); } bool Loop::makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt, diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index e157cc9212769..40104e8fb4249 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -474,7 +474,7 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L, MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, - LicmAllowSpeculation, HasCoroSuspendInst); + LicmAllowSpeculation); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -892,7 +892,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE, bool LoopNestMode, - bool AllowSpeculation, bool HasCoroSuspendInst) { + bool AllowSpeculation) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -925,7 +925,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, // TODO: It may be safe to hoist if we are hoisting to a conditional block // and we have accurately duplicated the control flow from the loop header // to that block. - if (CurLoop->hasLoopInvariantOperands(&I, HasCoroSuspendInst) && + if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) && isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo, ORE, Preheader->getTerminator(), AC, @@ -975,7 +975,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop); }; if ((IsInvariantStart(I) || isGuard(&I)) && - CurLoop->hasLoopInvariantOperands(&I, HasCoroSuspendInst) && + CurLoop->hasLoopInvariantOperands(&I) && MustExecuteWithoutWritesBefore(I)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); diff --git a/llvm/test/Transforms/LICM/licm-coroutine.ll b/llvm/test/Transforms/LICM/licm-coroutine.ll deleted file mode 100644 index a4765acfb93f8..0000000000000 --- a/llvm/test/Transforms/LICM/licm-coroutine.ll +++ /dev/null @@ -1,78 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=licm -S | FileCheck %s - -; %fca.0 and %fca.1 should not be hoisted out of the loop because the ramp -; function and resume function have different stack frames, so %pointer1 and -; %pointer2 have different values before and after @llvm.coro.suspend. - -define ptr @f(i32 %n) presplitcoroutine { -; CHECK-LABEL: define ptr @f( -; CHECK-SAME: i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[POINTER1:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: [[POINTER2:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) -; CHECK-NEXT: [[SIZE:%.*]] = call i32 @llvm.coro.size.i32() -; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i32 [[SIZE]]) -; CHECK-NEXT: [[HDL:%.*]] = call noalias ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]]) -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[INC:%.*]], %[[RESUME:.*]] ] -; CHECK-NEXT: [[INC]] = add nsw i32 [[N_VAL]], 1 -; CHECK-NEXT: call void @print(i32 [[N_VAL]]) -; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.coro.suspend(token none, i1 false) -; CHECK-NEXT: switch i8 [[TMP0]], label %[[SUSPEND_LOOPEXIT:.*]] [ -; CHECK-NEXT: i8 0, label %[[RESUME]] -; CHECK-NEXT: i8 1, label %[[CLEANUP:.*]] -; CHECK-NEXT: ] -; CHECK: [[RESUME]]: -; CHECK-NEXT: [[FCA_0:%.*]] = insertvalue [2 x ptr] poison, ptr [[POINTER1]], 0 -; CHECK-NEXT: [[FCA_1:%.*]] = insertvalue [2 x ptr] [[FCA_0]], ptr [[POINTER2]], 1 -; CHECK-NEXT: call void @foo([2 x ptr] [[FCA_1]]) -; CHECK-NEXT: br label %[[LOOP]] -; CHECK: [[CLEANUP]]: -; CHECK-NEXT: [[MEM:%.*]] = call ptr @llvm.coro.free(token [[ID]], ptr [[HDL]]) -; CHECK-NEXT: call void @free(ptr [[MEM]]) -; CHECK-NEXT: br label %[[SUSPEND:.*]] -; CHECK: [[SUSPEND_LOOPEXIT]]: -; CHECK-NEXT: br label %[[SUSPEND]] -; CHECK: [[SUSPEND]]: -; CHECK-NEXT: [[UNUSED:%.*]] = call i1 @llvm.coro.end(ptr [[HDL]], i1 false, token none) -; CHECK-NEXT: ret ptr [[HDL]] -; -entry: - %pointer1 = alloca ptr - %pointer2 = alloca ptr - %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) - %size = call i32 @llvm.coro.size.i32() - %alloc = call ptr @malloc(i32 %size) - %hdl = call noalias ptr @llvm.coro.begin(token %id, ptr %alloc) - br label %loop - -loop: - %n.val = phi i32 [ %n, %entry ], [ %inc, %resume ] - %inc = add nsw i32 %n.val, 1 - call void @print(i32 %n.val) - %0 = call i8 @llvm.coro.suspend(token none, i1 false) - switch i8 %0, label %suspend [i8 0, label %resume - i8 1, label %cleanup] - -resume: - %fca.0 = insertvalue [2 x ptr] poison, ptr %pointer1, 0 - %fca.1 = insertvalue [2 x ptr] %fca.0, ptr %pointer2, 1 - call void @foo([2 x ptr] %fca.1) - br label %loop - -cleanup: - %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) - call void @free(ptr %mem) - br label %suspend -suspend: - %unused = call i1 @llvm.coro.end(ptr %hdl, i1 false, token none) - ret ptr %hdl -} - -declare void @free(ptr) -declare ptr @malloc(i32) -declare void @print(i32) -declare void @foo([2 x ptr]) From 653ed062ea22e3d35b5b7ce6db391466821ca6e9 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 11 Sep 2025 18:38:54 -0700 Subject: [PATCH 064/734] [Clang] Invoke shell script with bash This test attempts to run a reproducer script generated by clang. This is intended to be run by a shell, so invoke it with an actual shell. This enables running the test with LLVM lit's internal shell. Reviewers: bcardosolopes, ilovepi, petrhosek Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/157608 --- clang/test/Modules/crash-vfs-umbrella-frameworks.m | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/clang/test/Modules/crash-vfs-umbrella-frameworks.m b/clang/test/Modules/crash-vfs-umbrella-frameworks.m index 3861dfb36819b..9f79fb1c09b0d 100644 --- a/clang/test/Modules/crash-vfs-umbrella-frameworks.m +++ b/clang/test/Modules/crash-vfs-umbrella-frameworks.m @@ -1,7 +1,6 @@ -// REQUIRES: crash-recovery, shell - -// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? -// XFAIL: target={{.*-windows-gnu}} +// REQUIRES: crash-recovery +// File path separator differences. +// UNSUPPORTED: system-windows // RUN: rm -rf %t // RUN: mkdir -p %t/i %t/m %t @@ -44,4 +43,4 @@ // RUN: rm -rf i // RUN: rm -rf crash-vfs-umbrella-*.cache/modules/* // RUN: chmod 755 crash-vfs-*.sh -// RUN: ./crash-vfs-*.sh +// RUN: bash ./crash-vfs-*.sh From 95fc948c0a07953ae9d0973854336e197e36d349 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 11 Sep 2025 18:44:07 -0700 Subject: [PATCH 065/734] [lit] Remove Python 2 string support (#157979) There are some code paths within lit that still check what string types are supported with the aim of being compatible with Python 2 and 3. Given LLVM's minimum Python version is 3.8 and we do not have any upstream testing for Python 2, I think we can safely drop this. --- llvm/utils/lit/lit/TestRunner.py | 8 +------- llvm/utils/lit/lit/llvm/config.py | 6 +++--- llvm/utils/lit/lit/util.py | 10 +--------- 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index cecbae61a3d73..a769919558a47 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -899,14 +899,8 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): # Replace uses of /dev/null with temporary files. if kAvoidDevNull: - # In Python 2.x, basestring is the base class for all string (including unicode) - # In Python 3.x, basestring no longer exist and str is always unicode - try: - str_type = basestring - except NameError: - str_type = str for i, arg in enumerate(args): - if isinstance(arg, str_type) and kDevNull in arg: + if isinstance(arg, str) and kDevNull in arg: f = tempfile.NamedTemporaryFile(delete=False) f.close() named_temp_files.append(f.name) diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 3fbda5489a9de..2f2df68ac0cff 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -233,7 +233,7 @@ def with_environment(self, variable, value, append_path=False): # For paths, we should be able to take a list of them and process # all of them. paths_to_add = value - if lit.util.is_string(paths_to_add): + if isinstance(paths_to_add, str): paths_to_add = [paths_to_add] def norm(x): @@ -262,7 +262,7 @@ def norm(x): self.config.environment[variable] = value def with_system_environment(self, variables, append_path=False): - if lit.util.is_string(variables): + if isinstance(variables, str): variables = [variables] for v in variables: value = os.environ.get(v) @@ -404,7 +404,7 @@ def add_tool_substitutions(self, tools, search_dirs=None): if not search_dirs: search_dirs = [self.config.llvm_tools_dir] - if lit.util.is_string(search_dirs): + if isinstance(search_dirs, str): search_dirs = [search_dirs] tools = [x if isinstance(x, ToolSubst) else ToolSubst(x) for x in tools] diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py index b03fd8bc22693..ce4c3c2df3436 100644 --- a/llvm/utils/lit/lit/util.py +++ b/llvm/utils/lit/lit/util.py @@ -13,14 +13,6 @@ import threading -def is_string(value): - try: - # Python 2 and Python 3 are different here. - return isinstance(value, basestring) - except NameError: - return isinstance(value, str) - - def pythonize_bool(value): if value is None: return False @@ -28,7 +20,7 @@ def pythonize_bool(value): return value if isinstance(value, numbers.Number): return value != 0 - if is_string(value): + if isinstance(value, str): if value.lower() in ("1", "true", "on", "yes"): return True if value.lower() in ("", "0", "false", "off", "no"): From 188901d6cab1d717eb8d114e76d9033c204a84e6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 10:50:03 +0900 Subject: [PATCH 066/734] AMDGPU: Fix returning wrong type for stack passed sub-dword arguments (#158002) Fixes assertion with -debug-only=isel on LowerFormalArguments result. That assert really shouldn't be under LLVM_DEBUG. Fixes #157997 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 66 ++-- llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 + ...k-passed-subdword-arg-crash-issue157997.ll | 283 ++++++++++++++++++ 3 files changed, 325 insertions(+), 27 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/stack-passed-subdword-arg-crash-issue157997.ll diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index cb3e544449bbf..d347573ce565a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2359,6 +2359,37 @@ SDValue SITargetLowering::lowerKernargMemParameter( return DAG.getMergeValues({Val, Load.getValue(1)}, SL); } +/// Coerce an argument which was passed in a different ABI type to the original +/// expected value type. +SDValue SITargetLowering::convertABITypeToValueType(SelectionDAG &DAG, + SDValue Val, + CCValAssign &VA, + const SDLoc &SL) const { + EVT ValVT = VA.getValVT(); + + // If this is an 8 or 16-bit value, it is really passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + switch (VA.getLocInfo()) { + case CCValAssign::Full: + return Val; + case CCValAssign::BCvt: + return DAG.getNode(ISD::BITCAST, SL, ValVT, Val); + case CCValAssign::SExt: + Val = DAG.getNode(ISD::AssertSext, SL, VA.getLocVT(), Val, + DAG.getValueType(ValVT)); + return DAG.getNode(ISD::TRUNCATE, SL, ValVT, Val); + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, SL, VA.getLocVT(), Val, + DAG.getValueType(ValVT)); + return DAG.getNode(ISD::TRUNCATE, SL, ValVT, Val); + case CCValAssign::AExt: + return DAG.getNode(ISD::TRUNCATE, SL, ValVT, Val); + default: + llvm_unreachable("Unknown loc info!"); + } +} + SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, const SDLoc &SL, SDValue Chain, @@ -2379,7 +2410,6 @@ SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - SDValue ArgValue; // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; @@ -2402,10 +2432,15 @@ SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, break; } - ArgValue = DAG.getExtLoad( + SDValue ArgValue = DAG.getExtLoad( ExtType, SL, VA.getLocVT(), Chain, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), MemVT); - return ArgValue; + + SDValue ConvertedVal = convertABITypeToValueType(DAG, ArgValue, VA, SL); + if (ConvertedVal == ArgValue) + return ConvertedVal; + + return DAG.getMergeValues({ConvertedVal, ArgValue.getValue(1)}, SL); } SDValue SITargetLowering::getPreloadedValue( @@ -3396,30 +3431,7 @@ SDValue SITargetLowering::LowerFormalArguments( DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits))); } - // If this is an 8 or 16-bit value, it is really passed promoted - // to 32 bits. Insert an assert[sz]ext to capture this, then - // truncate to the right size. - switch (VA.getLocInfo()) { - case CCValAssign::Full: - break; - case CCValAssign::BCvt: - Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); - break; - case CCValAssign::SExt: - Val = DAG.getNode(ISD::AssertSext, DL, VT, Val, DAG.getValueType(ValVT)); - Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); - break; - case CCValAssign::ZExt: - Val = DAG.getNode(ISD::AssertZext, DL, VT, Val, DAG.getValueType(ValVT)); - Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); - break; - case CCValAssign::AExt: - Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); - break; - default: - llvm_unreachable("Unknown loc info!"); - } - + Val = convertABITypeToValueType(DAG, Val, VA, DL); InVals.push_back(Val); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 4886fcf9fd012..9c26cfa44a83e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -58,6 +58,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { Align Alignment, ImplicitParameter Param) const; + SDValue convertABITypeToValueType(SelectionDAG &DAG, SDValue Val, + CCValAssign &VA, const SDLoc &SL) const; + SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, const SDLoc &SL, SDValue Chain, const ISD::InputArg &Arg) const; diff --git a/llvm/test/CodeGen/AMDGPU/stack-passed-subdword-arg-crash-issue157997.ll b/llvm/test/CodeGen/AMDGPU/stack-passed-subdword-arg-crash-issue157997.ll new file mode 100644 index 0000000000000..4791f603fc7ae --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/stack-passed-subdword-arg-crash-issue157997.ll @@ -0,0 +1,283 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s + +; Make sure that sub-dword arguments passed on the stack do not assert + +define i32 @stack_arg_i1(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i1 %badarg) #0 { +; GFX9-LABEL: stack_arg_i1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u8 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i1 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i1_zeroext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i1 zeroext %badarg) #0 { +; GFX9-LABEL: stack_arg_i1_zeroext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i1_zeroext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u8 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i1 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i1_signext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i1 signext %badarg) #0 { +; GFX9-LABEL: stack_arg_i1_signext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i1_signext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u8 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = sext i1 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i8(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i8 %badarg) #0 { +; GFX9-LABEL: stack_arg_i8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i8: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u8 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i8 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i8_zeroext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i8 zeroext %badarg) #0 { +; GFX9-LABEL: stack_arg_i8_zeroext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i8_zeroext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u16 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i8 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i8_signext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i8 signext %badarg) #0 { +; GFX9-LABEL: stack_arg_i8_signext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i8_signext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_i16 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = sext i8 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i16(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i16 %badarg) #0 { +; GFX9-LABEL: stack_arg_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u16 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i16 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i16_zeroext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i16 zeroext %badarg) #0 { +; GFX9-LABEL: stack_arg_i16_zeroext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i16_zeroext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u16 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i16 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i16_signext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i16 signext %badarg) #0 { +; GFX9-LABEL: stack_arg_i16_signext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i16_signext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_i16 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = sext i16 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i7(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i7 %badarg) #0 { +; GFX9-LABEL: stack_arg_i7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 0x7f, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i7: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u16 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v0, 0x7f, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i7 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i7_zeroext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i7 zeroext %badarg) #0 { +; GFX9-LABEL: stack_arg_i7_zeroext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i7_zeroext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u16 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i7 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i7_signext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i7 signext %badarg) #0 { +; GFX9-LABEL: stack_arg_i7_signext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i7_signext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_i16 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = sext i7 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i17(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i17 %badarg) #0 { +; GFX9-LABEL: stack_arg_i17: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 0x1ffff, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i17: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_b32 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v0, 0x1ffff, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i17 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i17_zeroext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i17 zeroext %badarg) #0 { +; GFX9-LABEL: stack_arg_i17_zeroext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i17_zeroext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_b32 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = zext i17 %badarg to i32 + ret i32 %ext +} + +define i32 @stack_arg_i17_signext(<8 x i32>, <8 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, i17 signext %badarg) #0 { +; GFX9-LABEL: stack_arg_i17_signext: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: stack_arg_i17_signext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_b32 v0, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %ext = sext i17 %badarg to i32 + ret i32 %ext +} + +attributes #0 = { nounwind } From af82c1a67b3a1dfc05b6149e68caa30103c15ce8 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Fri, 12 Sep 2025 10:05:24 +0800 Subject: [PATCH 067/734] [AMDGPU] Remove an unused variable (NFC) /llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp:3416:9: error: unused variable 'ValVT' [-Werror,-Wunused-variable] EVT ValVT = VA.getValVT(); ^ 1 error generated. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d347573ce565a..4927d2be67590 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3413,7 +3413,6 @@ SDValue SITargetLowering::LowerFormalArguments( RC = &AMDGPU::SGPR_32RegClass; else llvm_unreachable("Unexpected register class in LowerFormalArguments!"); - EVT ValVT = VA.getValVT(); Reg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); From 2508851798d727517f60d8b5a76c3001cabb7c59 Mon Sep 17 00:00:00 2001 From: benwu25 Date: Thu, 11 Sep 2025 19:40:17 -0700 Subject: [PATCH 068/734] [InstCombine] Fold `min(X+1, Y) - min(X, Y) --> zext X < Y` (#157782) This PR closes #157524. alive2: https://alive2.llvm.org/ce/z/xe_vb2 godbolt: https://alive2.llvm.org/ce/z/7A8PxK This fold is invalid for `@llvm.smin.i1` since `smin(-1, 0) == -1`. I also avoided i1 in general since this uses zext, but it seems like those checks for width might not be necessary, since other folds get to it first. The alive2 proof in #157524 used a select for the fold, but it seems like `select X < Y, 1, 0` should be canonicalized to `zext X < Y` if the bit width is correct. --- .../InstCombine/InstCombineAddSub.cpp | 18 +++ llvm/test/Transforms/InstCombine/min-zext.ll | 150 ++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/min-zext.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index f9155cc660317..71c53e37c7757 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2731,6 +2731,24 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return BinaryOperator::CreateSub(X, Not); } + // min(X+1, Y) - min(X, Y) --> zext X < Y + // Replacing a sub and at least one min with an icmp + // and a zext is a potential improvement. + if (match(Op0, m_c_SMin(m_NSWAddLike(m_Value(X), m_One()), m_Value(Y))) && + match(Op1, m_c_SMin(m_Specific(X), m_Specific(Y))) && + I.getType()->getScalarSizeInBits() != 1 && + (Op0->hasOneUse() || Op1->hasOneUse())) { + Value *Cond = Builder.CreateICmpSLT(X, Y); + return new ZExtInst(Cond, I.getType()); + } + if (match(Op0, m_c_UMin(m_NUWAddLike(m_Value(X), m_One()), m_Value(Y))) && + match(Op1, m_c_UMin(m_Specific(X), m_Specific(Y))) && + I.getType()->getScalarSizeInBits() != 1 && + (Op0->hasOneUse() || Op1->hasOneUse())) { + Value *Cond = Builder.CreateICmpULT(X, Y); + return new ZExtInst(Cond, I.getType()); + } + // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". Value *LHSOp, *RHSOp; diff --git a/llvm/test/Transforms/InstCombine/min-zext.ll b/llvm/test/Transforms/InstCombine/min-zext.ll new file mode 100644 index 0000000000000..f016d1a8de524 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/min-zext.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i32 @test_smin(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_smin( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1) + %v1 = add nsw i32 %arg0, 1 + %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +define i32 @test_umin(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_umin( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1) + %v1 = add nuw i32 %arg0, 1 + %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +define i1 @test_smin_i1(i1 %arg0, i1 %arg1) { +; CHECK-LABEL: define i1 @test_smin_i1( +; CHECK-SAME: i1 [[ARG0:%.*]], i1 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = or i1 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = xor i1 [[V0]], true +; CHECK-NEXT: ret i1 [[V3]] +; + %v0 = tail call i1 @llvm.smin.i1(i1 %arg0, i1 %arg1) + %v1 = add nsw i1 %arg0, 1 + %v2 = tail call i1 @llvm.smin.i1(i1 %v1, i1 %arg1) + %v3 = sub i1 %v2, %v0 + ret i1 %v3 +} + +declare void @use(i2) + +define i2 @test_smin_use_operands(i2 %arg0, i2 %arg1) { +; CHECK-LABEL: define i2 @test_smin_use_operands( +; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = tail call i2 @llvm.smin.i2(i2 [[ARG0]], i2 [[ARG1]]) +; CHECK-NEXT: [[V1:%.*]] = add nsw i2 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]]) +; CHECK-NEXT: [[V3:%.*]] = sub i2 [[V2]], [[V0]] +; CHECK-NEXT: call void @use(i2 [[V2]]) +; CHECK-NEXT: call void @use(i2 [[V0]]) +; CHECK-NEXT: ret i2 [[V3]] +; + %v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1) + %v1 = add nsw i2 %arg0, 1 + %v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1) + %v3 = sub i2 %v2, %v0 + call void @use(i2 %v2) + call void @use(i2 %v0) + ret i2 %v3 +} + +define i2 @test_smin_use_operand(i2 %arg0, i2 %arg1) { +; CHECK-LABEL: define i2 @test_smin_use_operand( +; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = add nsw i2 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i2 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i2 +; CHECK-NEXT: call void @use(i2 [[V2]]) +; CHECK-NEXT: ret i2 [[V3]] +; + %v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1) + %v1 = add nsw i2 %arg0, 1 + %v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1) + %v3 = sub i2 %v2, %v0 + call void @use(i2 %v2) + ret i2 %v3 +} + +define i32 @test_smin_missing_nsw(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_smin_missing_nsw( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.smin.i32(i32 [[ARG0]], i32 [[ARG1]]) +; CHECK-NEXT: [[V1:%.*]] = add i32 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 [[ARG1]]) +; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]] +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1) + %v1 = add i32 %arg0, 1 + %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +define i32 @test_umin_missing_nuw(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_umin_missing_nuw( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.umin.i32(i32 [[ARG0]], i32 [[ARG1]]) +; CHECK-NEXT: [[V1:%.*]] = add i32 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.umin.i32(i32 [[V1]], i32 [[ARG1]]) +; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]] +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1) + %v1 = add i32 %arg0, 1 + %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +@tmp = external global i32 + +define i32 @test_mismatched_operands(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_mismatched_operands( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr @tmp, align 4 +; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.smin.i32(i32 [[ARG0]], i32 [[TMP]]) +; CHECK-NEXT: [[V1:%.*]] = add nsw i32 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 [[ARG1]]) +; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]] +; CHECK-NEXT: ret i32 [[V3]] +; + %tmp = load i32, ptr @tmp, align 4 + %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %tmp) + %v1 = add nsw i32 %arg0, 1 + %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +define i32 @test_disjoint_or(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_disjoint_or( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1) + %v1 = or disjoint i32 %arg0, 1 + %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} From 5621fa1c1ee210e24070b58ba9fa9e1c8fe5c132 Mon Sep 17 00:00:00 2001 From: lntue Date: Fri, 12 Sep 2025 00:03:17 -0400 Subject: [PATCH 069/734] [libc] Some more MSVC compatibility in src/__support. (#158108) --- libc/src/__support/CPP/bit.h | 2 +- .../CPP/type_traits/is_destructible.h | 2 +- libc/src/__support/FPUtil/FEnvImpl.h | 8 +++- libc/src/__support/FPUtil/FPBits.h | 10 ++-- libc/src/__support/endian_internal.h | 48 ++++++++++++++++--- libc/src/__support/macros/config.h | 1 + libc/src/__support/math_extras.h | 10 ++++ 7 files changed, 66 insertions(+), 15 deletions(-) diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 5a997ef555702..8dbb30047faec 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -39,7 +39,7 @@ bit_cast(const From &from) { #if __has_builtin(__builtin_bit_cast) return __builtin_bit_cast(To, from); #else - To to; + To to{}; char *dst = reinterpret_cast(&to); const char *src = reinterpret_cast(&from); #if __has_builtin(__builtin_memcpy_inline) diff --git a/libc/src/__support/CPP/type_traits/is_destructible.h b/libc/src/__support/CPP/type_traits/is_destructible.h index 830f22efafa52..7ada2235b4e73 100644 --- a/libc/src/__support/CPP/type_traits/is_destructible.h +++ b/libc/src/__support/CPP/type_traits/is_destructible.h @@ -22,7 +22,7 @@ namespace LIBC_NAMESPACE_DECL { namespace cpp { // is_destructible -#if __has_builtin(__is_destructible) +#if __has_builtin(__is_destructible) || defined(LIBC_COMPILER_IS_MSVC) template struct is_destructible : bool_constant<__is_destructible(T)> {}; #else diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h index 7bd56434e58fe..ef3f60a5b3d7f 100644 --- a/libc/src/__support/FPUtil/FEnvImpl.h +++ b/libc/src/__support/FPUtil/FEnvImpl.h @@ -18,6 +18,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/compiler.h" #if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP) #if defined(__APPLE__) @@ -29,9 +30,12 @@ // The extra !defined(APPLE) condition is to cause x86_64 MacOS builds to use // the dummy implementations below. Once a proper x86_64 darwin fenv is set up, // the apple condition here should be removed. -#elif defined(LIBC_TARGET_ARCH_IS_X86) && !defined(__APPLE__) +// TODO: fully support fenv for MSVC. +#elif defined(LIBC_TARGET_ARCH_IS_X86) && !defined(__APPLE__) && \ + !defined(LIBC_COMPILER_IS_MSVC) #include "x86_64/FEnvImpl.h" -#elif defined(LIBC_TARGET_ARCH_IS_ARM) && defined(__ARM_FP) +#elif defined(LIBC_TARGET_ARCH_IS_ARM) && defined(__ARM_FP) && \ + !defined(LIBC_COMPILER_IS_MSVC) #include "arm/FEnvImpl.h" #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) && defined(__riscv_flen) #include "riscv/FEnvImpl.h" diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h index 2f695c1583755..ce4925bae125a 100644 --- a/libc/src/__support/FPUtil/FPBits.h +++ b/libc/src/__support/FPUtil/FPBits.h @@ -789,16 +789,16 @@ struct FPRep : public FPRepImpl> { // Returns the FPType corresponding to C++ type T on the host. template LIBC_INLINE static constexpr FPType get_fp_type() { using UnqualT = cpp::remove_cv_t; - if constexpr (cpp::is_same_v && __FLT_MANT_DIG__ == 24) + if constexpr (cpp::is_same_v && FLT_MANT_DIG == 24) return FPType::IEEE754_Binary32; - else if constexpr (cpp::is_same_v && __DBL_MANT_DIG__ == 53) + else if constexpr (cpp::is_same_v && DBL_MANT_DIG == 53) return FPType::IEEE754_Binary64; else if constexpr (cpp::is_same_v) { - if constexpr (__LDBL_MANT_DIG__ == 53) + if constexpr (LDBL_MANT_DIG == 53) return FPType::IEEE754_Binary64; - else if constexpr (__LDBL_MANT_DIG__ == 64) + else if constexpr (LDBL_MANT_DIG == 64) return FPType::X86_Binary80; - else if constexpr (__LDBL_MANT_DIG__ == 113) + else if constexpr (LDBL_MANT_DIG == 113) return FPType::IEEE754_Binary128; } #if defined(LIBC_TYPES_HAS_FLOAT16) diff --git a/libc/src/__support/endian_internal.h b/libc/src/__support/endian_internal.h index c78090ad85e05..4ac8709625d3a 100644 --- a/libc/src/__support/endian_internal.h +++ b/libc/src/__support/endian_internal.h @@ -16,13 +16,49 @@ namespace LIBC_NAMESPACE_DECL { // We rely on compiler preprocessor defines to allow for cross compilation. +#ifdef LIBC_COMPILER_IS_MSVC +#define __BYTE_ORDER__ 0 +#define __ORDER_LITTLE_ENDIAN__ 0 +#define __ORDER_BIG_ENDIAN__ 1 +#else // !LIBC_COMPILER_IS_MSVC #if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ !defined(__ORDER_BIG_ENDIAN__) #error "Missing preprocessor definitions for endianness detection." #endif +#endif // LIBC_COMPILER_IS_MSVC namespace internal { +template LIBC_INLINE T byte_swap(T value); + +template <> LIBC_INLINE uint16_t byte_swap(uint16_t value) { +#if __has_builtin(__builtin_bswap16) + return __builtin_bswap16(value); +#else + return (v << 8) | (v >> 8); +#endif // __builtin_bswap16 +} + +template <> LIBC_INLINE uint32_t byte_swap(uint32_t value) { +#if __has_builtin(__builtin_bswap32) + return __builtin_bswap32(value); +#else + return byte_swap(static_cast(v >> 16)) || + (static_cast(byte_swap(static_cast(v))) + << 16); +#endif // __builtin_bswap64 +} + +template <> LIBC_INLINE uint64_t byte_swap(uint64_t value) { +#if __has_builtin(__builtin_bswap64) + return __builtin_bswap64(value); +#else + return byte_swap(static_cast(v >> 32)) || + (static_cast(byte_swap(static_cast(v))) + << 32); +#endif // __builtin_bswap64 +} + // Converts uint8_t, uint16_t, uint32_t, uint64_t to its big or little endian // counterpart. // We use explicit template specialization: @@ -53,7 +89,7 @@ template <> template <> LIBC_INLINE uint16_t Endian<__ORDER_LITTLE_ENDIAN__>::to_big_endian(uint16_t v) { - return __builtin_bswap16(v); + return byte_swap(v); } template <> template <> @@ -65,7 +101,7 @@ template <> template <> LIBC_INLINE uint32_t Endian<__ORDER_LITTLE_ENDIAN__>::to_big_endian(uint32_t v) { - return __builtin_bswap32(v); + return byte_swap(v); } template <> template <> @@ -77,7 +113,7 @@ template <> template <> LIBC_INLINE uint64_t Endian<__ORDER_LITTLE_ENDIAN__>::to_big_endian(uint64_t v) { - return __builtin_bswap64(v); + return byte_swap(v); } template <> template <> @@ -109,7 +145,7 @@ template <> template <> LIBC_INLINE uint16_t Endian<__ORDER_BIG_ENDIAN__>::to_little_endian(uint16_t v) { - return __builtin_bswap16(v); + return byte_swap(v); } template <> template <> @@ -121,7 +157,7 @@ template <> template <> LIBC_INLINE uint32_t Endian<__ORDER_BIG_ENDIAN__>::to_little_endian(uint32_t v) { - return __builtin_bswap32(v); + return byte_swap(v); } template <> template <> @@ -133,7 +169,7 @@ template <> template <> LIBC_INLINE uint64_t Endian<__ORDER_BIG_ENDIAN__>::to_little_endian(uint64_t v) { - return __builtin_bswap64(v); + return byte_swap(v); } } // namespace internal diff --git a/libc/src/__support/macros/config.h b/libc/src/__support/macros/config.h index 685188893e7b7..501a816d49631 100644 --- a/libc/src/__support/macros/config.h +++ b/libc/src/__support/macros/config.h @@ -44,6 +44,7 @@ #endif #define __builtin_expect(value, expectation) (value) +#define __builtin_unreachable() __assume(0) #endif // LIBC_COMPILER_IS_MSVC diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index 954bcb1b6ef89..d4dc6dcb4acf6 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -55,13 +55,23 @@ mask_leading_zeros() { // Returns whether 'a + b' overflows, the result is stored in 'res'. template [[nodiscard]] LIBC_INLINE constexpr bool add_overflow(T a, T b, T &res) { +#if __has_builtin(__builtin_add_overflow) return __builtin_add_overflow(a, b, &res); +#else + res = a + b; + return (res < a) || (res < b); +#endif // __builtin_add_overflow } // Returns whether 'a - b' overflows, the result is stored in 'res'. template [[nodiscard]] LIBC_INLINE constexpr bool sub_overflow(T a, T b, T &res) { +#if __has_builtin(__builtin_sub_overflow) return __builtin_sub_overflow(a, b, &res); +#else + res = a - b; + return (res > a); +#endif // __builtin_sub_overflow } #define RETURN_IF(TYPE, BUILTIN) \ From 30010f49ca4de03762a31202c82e9c763d4a8822 Mon Sep 17 00:00:00 2001 From: Tony Varghese Date: Fri, 12 Sep 2025 09:36:37 +0530 Subject: [PATCH 070/734] [NFC][PowerPC] Pre-commit testcases for locking down the xxsel instructions for ternary(A, X, eqv(B,C)), ternary(A, X, not(C)), ternary(A, X, not(B)), ternary(A, X, nand(B,C)) and ternary(A, X, nor(B,C)) patterns (#158091) Pre-commit test case for exploitation of `xxsel` for ternary operations of the pattern. This adds support for v4i32, v2i64, v16i8 and v8i16 operand types for the following patterns. The following are the patterns involved in the change: ``` ternary(A, and(B,C), nor(B,C)) ternary(A, B, nor(B,C)) ternary(A, C, nor(B,C)) ternary(A, xor(B,C), nor(B,C)) ternary(A, not(C), nor(B,C)) ternary(A, not(B), nor(B,C)) ternary(A, nand(B,C), nor(B,C)) ternary(A, or(B,C), eqv(B,C)) ternary(A, nor(B,C), eqv(B,C)) ternary(A, not(C), eqv(B,C)) ternary(A, nand(B,C), eqv(B,C)) ternary(A, and(B,C), not(C)) ternary(A, B, not(C)) ternary(A, xor(B,C), not(C)) ternary(A, or(B,C), not(C)) ternary(A, not(B), not(C)) ternary(A, nand(B,C), not(C)) ternary(A, and(B,C), not(B)) ternary(A, xor(B,C), not(B)) ternary(A, or(B,C), not(B)) ternary(A, nand(B,C), not(B)) ternary(A, B, nand(B,C)) ternary(A, C, nand(B,C)) ternary(A, xor(B,C), nand(B,C)) ternary(A, or(B,C), nand(B,C)) ternary(A, eqv(B,C), nand(B,C)) ``` Exploitation of `xxeval` for the above patterns to be added as a follow up. Co-authored-by: Tony Varghese --- .../CodeGen/PowerPC/xxeval-vselect-x-eqv.ll | 327 +++++++++++ .../CodeGen/PowerPC/xxeval-vselect-x-nand.ll | 384 +++++++++++++ .../CodeGen/PowerPC/xxeval-vselect-x-nor.ll | 538 ++++++++++++++++++ .../CodeGen/PowerPC/xxeval-vselect-x-not-b.ll | 307 ++++++++++ .../CodeGen/PowerPC/xxeval-vselect-x-not-c.ll | 445 +++++++++++++++ 5 files changed, 2001 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll create mode 100644 llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll create mode 100644 llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll create mode 100644 llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll create mode 100644 llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll new file mode 100644 index 0000000000000..7fa576f599dc4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; Function to test ternary(A, or(B, C), eqv(B, C)) for <4 x i32> +define <4 x i32> @ternary_A_or_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_or_BC_eqv_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <4 x i32> %B, %C + %xor = xor <4 x i32> %B, %C + %eqv = xor <4 x i32> %xor, ; Vector eqv operation + %res = select <4 x i1> %A, <4 x i32> %or, <4 x i32> %eqv + ret <4 x i32> %res +} + +; Function to test ternary(A, or(B, C), eqv(B, C)) for <2 x i64> +define <2 x i64> @ternary_A_or_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_or_BC_eqv_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <2 x i64> %B, %C + %xor = xor <2 x i64> %B, %C + %eqv = xor <2 x i64> %xor, ; Vector eqv operation + %res = select <2 x i1> %A, <2 x i64> %or, <2 x i64> %eqv + ret <2 x i64> %res +} + +; Function to test ternary(A, or(B, C), eqv(B, C)) for <16 x i8> +define <16 x i8> @ternary_A_or_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_or_BC_eqv_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <16 x i8> %B, %C + %xor = xor <16 x i8> %B, %C + %eqv = xor <16 x i8> %xor, ; Vector eqv operation + %res = select <16 x i1> %A, <16 x i8> %or, <16 x i8> %eqv + ret <16 x i8> %res +} + +; Function to test ternary(A, or(B, C), eqv(B, C)) for <8 x i16> +define <8 x i16> @ternary_A_or_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_or_BC_eqv_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <8 x i16> %B, %C + %xor = xor <8 x i16> %B, %C + %eqv = xor <8 x i16> %xor, ; Vector eqv operation + %res = select <8 x i1> %A, <8 x i16> %or, <8 x i16> %eqv + ret <8 x i16> %res +} + +; Function to test ternary(A, nor(B, C), eqv(B, C)) for <4 x i32> +define <4 x i32> @ternary_A_nor_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <4 x i32> %B, %C + %nor = xor <4 x i32> %or, ; Vector NOR operation + %xor = xor <4 x i32> %B, %C + %eqv = xor <4 x i32> %xor, ; Vector eqv operation + %res = select <4 x i1> %A, <4 x i32> %nor, <4 x i32> %eqv + ret <4 x i32> %res +} + +; Function to test ternary(A, nor(B, C), eqv(B, C)) for <2 x i64> +define <2 x i64> @ternary_A_nor_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <2 x i64> %B, %C + %nor = xor <2 x i64> %or, ; Vector NOR operation + %xor = xor <2 x i64> %B, %C + %eqv = xor <2 x i64> %xor, ; Vector eqv operation + %res = select <2 x i1> %A, <2 x i64> %nor, <2 x i64> %eqv + ret <2 x i64> %res +} + +; Function to test ternary(A, nor(B, C), eqv(B, C)) for <16 x i8> +define <16 x i8> @ternary_A_nor_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <16 x i8> %B, %C + %nor = xor <16 x i8> %or, ; Vector NOR operation + %xor = xor <16 x i8> %B, %C + %eqv = xor <16 x i8> %xor, ; Vector eqv operation + %res = select <16 x i1> %A, <16 x i8> %nor, <16 x i8> %eqv + ret <16 x i8> %res +} + +; Function to test ternary(A, nor(B, C), eqv(B, C)) for <8 x i16> +define <8 x i16> @ternary_A_nor_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <8 x i16> %B, %C + %nor = xor <8 x i16> %or, ; Vector NOR operation + %xor = xor <8 x i16> %B, %C + %eqv = xor <8 x i16> %xor, ; Vector eqv operation + %res = select <8 x i1> %A, <8 x i16> %nor, <8 x i16> %eqv + ret <8 x i16> %res +} + +; Function to test ternary(A, not(C), eqv(B, C)) for <4 x i32> +define <4 x i32> @ternary_A_not_C_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_not_C_eqv_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxleqv vs1, v4, v3 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <4 x i32> %C, ; Vector not operation + %xor = xor <4 x i32> %B, %C + %eqv = xor <4 x i32> %xor, ; Vector eqv operation + %res = select <4 x i1> %A, <4 x i32> %not, <4 x i32> %eqv + ret <4 x i32> %res +} + +; Function to test ternary(A, not(C), eqv(B, C)) for <2 x i64> +define <2 x i64> @ternary_A_not_C_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_not_C_eqv_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxleqv vs1, v4, v3 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <2 x i64> %C, ; Vector not operation + %xor = xor <2 x i64> %B, %C + %eqv = xor <2 x i64> %xor, ; Vector eqv operation + %res = select <2 x i1> %A, <2 x i64> %not, <2 x i64> %eqv + ret <2 x i64> %res +} + +; Function to test ternary(A, not(C), eqv(B, C)) for <16 x i8> +define <16 x i8> @ternary_A_not_C_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_not_C_eqv_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxleqv vs1, v4, v3 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <16 x i8> %C, ; Vector not operation + %xor = xor <16 x i8> %B, %C + %eqv = xor <16 x i8> %xor, ; Vector eqv operation + %res = select <16 x i1> %A, <16 x i8> %not, <16 x i8> %eqv + ret <16 x i8> %res +} + +; Function to test ternary(A, not(C), eqv(B, C)) for <8 x i16> +define <8 x i16> @ternary_A_not_C_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_not_C_eqv_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxleqv vs1, v4, v3 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <8 x i16> %C, ; Vector not operation + %xor = xor <8 x i16> %B, %C + %eqv = xor <8 x i16> %xor, ; Vector eqv operation + %res = select <8 x i1> %A, <8 x i16> %not, <8 x i16> %eqv + ret <8 x i16> %res +} + +; Function to test ternary(A, nand(B, C), eqv(B, C)) for <4 x i32> +define <4 x i32> @ternary_A_nand_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <4 x i32> %B, %C + %nand = xor <4 x i32> %and, ; Vector nand operation + %xor = xor <4 x i32> %B, %C + %eqv = xor <4 x i32> %xor, ; Vector eqv operation + %res = select <4 x i1> %A, <4 x i32> %nand, <4 x i32> %eqv + ret <4 x i32> %res +} + +; Function to test ternary(A, nand(B, C), eqv(B, C)) for <2 x i64> +define <2 x i64> @ternary_A_nand_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <2 x i64> %B, %C + %nand = xor <2 x i64> %and, ; Vector nand operation + %xor = xor <2 x i64> %B, %C + %eqv = xor <2 x i64> %xor, ; Vector eqv operation + %res = select <2 x i1> %A, <2 x i64> %nand, <2 x i64> %eqv + ret <2 x i64> %res +} + +; Function to test ternary(A, nand(B, C), eqv(B, C)) for <16 x i8> +define <16 x i8> @ternary_A_nand_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <16 x i8> %B, %C + %nand = xor <16 x i8> %and, ; Vector nand operation + %xor = xor <16 x i8> %B, %C + %eqv = xor <16 x i8> %xor, ; Vector eqv operation + %res = select <16 x i1> %A, <16 x i8> %nand, <16 x i8> %eqv + ret <16 x i8> %res +} + +; Function to test ternary(A, nand(B, C), eqv(B, C)) for <8 x i16> +define <8 x i16> @ternary_A_nand_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <8 x i16> %B, %C + %nand = xor <8 x i16> %and, ; Vector nand operation + %xor = xor <8 x i16> %B, %C + %eqv = xor <8 x i16> %xor, ; Vector eqv operation + %res = select <8 x i1> %A, <8 x i16> %nand, <8 x i16> %eqv + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll new file mode 100644 index 0000000000000..7a6733d3b5510 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll @@ -0,0 +1,384 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; Function to test ternary(A, B, nand(B, C)) for <4 x i32> +define <4 x i32> @ternary_A_B_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_B_nand_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %and = and <4 x i32> %B, %C + %nand = xor <4 x i32> %and, ; Vector nand operation + %res = select <4 x i1> %A, <4 x i32> %B, <4 x i32> %nand + ret <4 x i32> %res +} + +; Function to test ternary(A, B, nand(B, C)) for <2 x i64> +define <2 x i64> @ternary_A_B_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_B_nand_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %and = and <2 x i64> %B, %C + %nand = xor <2 x i64> %and, ; Vector nand operation + %res = select <2 x i1> %A, <2 x i64> %B, <2 x i64> %nand + ret <2 x i64> %res +} + +; Function to test ternary(A, B, nand(B, C)) for <16 x i8> +define <16 x i8> @ternary_A_B_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_B_nand_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %and = and <16 x i8> %B, %C + %nand = xor <16 x i8> %and, ; Vector nand operation + %res = select <16 x i1> %A, <16 x i8> %B, <16 x i8> %nand + ret <16 x i8> %res +} + +; Function to test ternary(A, B, nand(B, C)) for <8 x i16> +define <8 x i16> @ternary_A_B_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_B_nand_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %and = and <8 x i16> %B, %C + %nand = xor <8 x i16> %and, ; Vector nand operation + %res = select <8 x i1> %A, <8 x i16> %B, <8 x i16> %nand + ret <8 x i16> %res +} + +; Function to test ternary(A, C, nand(B, C)) for <4 x i32> +define <4 x i32> @ternary_A_C_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_C_nand_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: blr +entry: + %and = and <4 x i32> %B, %C + %nand = xor <4 x i32> %and, ; Vector nand operation + %res = select <4 x i1> %A, <4 x i32> %C, <4 x i32> %nand + ret <4 x i32> %res +} + +; Function to test ternary(A, C, nand(B, C)) for <2 x i64> +define <2 x i64> @ternary_A_C_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_C_nand_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: blr +entry: + %and = and <2 x i64> %B, %C + %nand = xor <2 x i64> %and, ; Vector nand operation + %res = select <2 x i1> %A, <2 x i64> %C, <2 x i64> %nand + ret <2 x i64> %res +} + +; Function to test ternary(A, C, nand(B, C)) for <16 x i8> +define <16 x i8> @ternary_A_C_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_C_nand_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: blr +entry: + %and = and <16 x i8> %B, %C + %nand = xor <16 x i8> %and, ; Vector nand operation + %res = select <16 x i1> %A, <16 x i8> %C, <16 x i8> %nand + ret <16 x i8> %res +} + +; Function to test ternary(A, C, nand(B, C)) for <8 x i16> +define <8 x i16> @ternary_A_C_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_C_nand_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: blr +entry: + %and = and <8 x i16> %B, %C + %nand = xor <8 x i16> %and, ; Vector nand operation + %res = select <8 x i1> %A, <8 x i16> %C, <8 x i16> %nand + ret <8 x i16> %res +} + +; Function to test ternary(A, xor(B, C), nand(B, C)) for <4 x i32> +define <4 x i32> @ternary_A_xor_BC_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_xor_BC_nand_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <4 x i32> %B, %C + %and = and <4 x i32> %B, %C + %nand = xor <4 x i32> %and, ; Vector nand operation + %res = select <4 x i1> %A, <4 x i32> %xor, <4 x i32> %nand + ret <4 x i32> %res +} + +; Function to test ternary(A, xor(B, C), nand(B, C)) for <2 x i64> +define <2 x i64> @ternary_A_xor_BC_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_xor_BC_nand_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <2 x i64> %B, %C + %and = and <2 x i64> %B, %C + %nand = xor <2 x i64> %and, ; Vector nand operation + %res = select <2 x i1> %A, <2 x i64> %xor, <2 x i64> %nand + ret <2 x i64> %res +} + +; Function to test ternary(A, xor(B, C), nand(B, C)) for <16 x i8> +define <16 x i8> @ternary_A_xor_BC_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_xor_BC_nand_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <16 x i8> %B, %C + %and = and <16 x i8> %B, %C + %nand = xor <16 x i8> %and, ; Vector nand operation + %res = select <16 x i1> %A, <16 x i8> %xor, <16 x i8> %nand + ret <16 x i8> %res +} + +; Function to test ternary(A, xor(B, C), nand(B, C)) for <8 x i16> +define <8 x i16> @ternary_A_xor_BC_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_xor_BC_nand_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <8 x i16> %B, %C + %and = and <8 x i16> %B, %C + %nand = xor <8 x i16> %and, ; Vector nand operation + %res = select <8 x i1> %A, <8 x i16> %xor, <8 x i16> %nand + ret <8 x i16> %res +} + +; Function to test ternary(A, or(B, C), nand(B, C)) for <4 x i32> +define <4 x i32> @ternary_A_or_BC_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_or_BC_nand_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <4 x i32> %B, %C + %and = and <4 x i32> %B, %C + %nand = xor <4 x i32> %and, ; Vector nand operation + %res = select <4 x i1> %A, <4 x i32> %or, <4 x i32> %nand + ret <4 x i32> %res +} + +; Function to test ternary(A, or(B, C), nand(B, C)) for <2 x i64> +define <2 x i64> @ternary_A_or_BC_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_or_BC_nand_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <2 x i64> %B, %C + %and = and <2 x i64> %B, %C + %nand = xor <2 x i64> %and, ; Vector nand operation + %res = select <2 x i1> %A, <2 x i64> %or, <2 x i64> %nand + ret <2 x i64> %res +} + +; Function to test ternary(A, or(B, C), nand(B, C)) for <16 x i8> +define <16 x i8> @ternary_A_or_BC_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_or_BC_nand_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <16 x i8> %B, %C + %and = and <16 x i8> %B, %C + %nand = xor <16 x i8> %and, ; Vector nand operation + %res = select <16 x i1> %A, <16 x i8> %or, <16 x i8> %nand + ret <16 x i8> %res +} + +; Function to test ternary(A, or(B, C), nand(B, C)) for <8 x i16> +define <8 x i16> @ternary_A_or_BC_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_or_BC_nand_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <8 x i16> %B, %C + %and = and <8 x i16> %B, %C + %nand = xor <8 x i16> %and, ; Vector nand operation + %res = select <8 x i1> %A, <8 x i16> %or, <8 x i16> %nand + ret <8 x i16> %res +} + +; Function to test ternary(A, eqv(B, C), nand(B, C)) for <4 x i32> +define <4 x i32> @ternary_A_eqv_BC_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <4 x i32> %B, %C + %eqv = xor <4 x i32> %xor, ; Vector eqv operation + %and = and <4 x i32> %B, %C + %nand = xor <4 x i32> %and, ; Vector nand operation + %res = select <4 x i1> %A, <4 x i32> %eqv, <4 x i32> %nand + ret <4 x i32> %res +} + +; Function to test ternary(A, eqv(B, C), nand(B, C)) for <2 x i64> +define <2 x i64> @ternary_A_eqv_BC_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <2 x i64> %B, %C + %eqv = xor <2 x i64> %xor, ; Vector eqv operation + %and = and <2 x i64> %B, %C + %nand = xor <2 x i64> %and, ; Vector nand operation + %res = select <2 x i1> %A, <2 x i64> %eqv, <2 x i64> %nand + ret <2 x i64> %res +} + +; Function to test ternary(A, eqv(B, C), nand(B, C)) for <16 x i8> +define <16 x i8> @ternary_A_eqv_BC_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <16 x i8> %B, %C + %eqv = xor <16 x i8> %xor, ; Vector eqv operation + %and = and <16 x i8> %B, %C + %nand = xor <16 x i8> %and, ; Vector nand operation + %res = select <16 x i1> %A, <16 x i8> %eqv, <16 x i8> %nand + ret <16 x i8> %res +} + +; Function to test ternary(A, eqv(B, C), nand(B, C)) for <8 x i16> +define <8 x i16> @ternary_A_eqv_BC_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, v3, v4 +; CHECK-NEXT: xxlnand vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <8 x i16> %B, %C + %eqv = xor <8 x i16> %xor, ; Vector eqv operation + %and = and <8 x i16> %B, %C + %nand = xor <8 x i16> %and, ; Vector nand operation + %res = select <8 x i1> %A, <8 x i16> %eqv, <8 x i16> %nand + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll new file mode 100644 index 0000000000000..d635952e5d8f2 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll @@ -0,0 +1,538 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test file to verify the emission of Vector selection instructions when ternary operators are used. + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; Function to test ternary(A, and(B, C), nor(B,C)) for <4 x i32> +define <4 x i32> @ternary_A_and_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_and_BC_nor_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <4 x i32> %B, %C + %or = or <4 x i32> %B, %C + %nor = xor <4 x i32> %or, ; Vector NOR operation + %res = select <4 x i1> %A, <4 x i32> %and, <4 x i32> %nor + ret <4 x i32> %res +} + +; Function to test ternary(A, and(B, C), nor(B,C)) for <2 x i64> +define <2 x i64> @ternary_A_and_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_and_BC_nor_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <2 x i64> %B, %C + %or = or <2 x i64> %B, %C + %nor = xor <2 x i64> %or, ; Vector NOR operation + %res = select <2 x i1> %A, <2 x i64> %and, <2 x i64> %nor + ret <2 x i64> %res +} + +; Function to test ternary(A, and(B, C), nor(B,C)) for <16 x i8> +define <16 x i8> @ternary_A_and_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_and_BC_nor_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <16 x i8> %B, %C + %or = or <16 x i8> %B, %C + %nor = xor <16 x i8> %or, ; Vector NOR operation + %res = select <16 x i1> %A, <16 x i8> %and, <16 x i8> %nor + ret <16 x i8> %res +} + +; Function to test ternary(A, and(B, C), nor(B,C)) for <8 x i16> +define <8 x i16> @ternary_A_and_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_and_BC_nor_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <8 x i16> %B, %C + %or = or <8 x i16> %B, %C + %nor = xor <8 x i16> %or, ; Vector NOR operation + %res = select <8 x i1> %A, <8 x i16> %and, <8 x i16> %nor + ret <8 x i16> %res +} + +; Function to test ternary(A, B, nor(B,C)) for <4 x i32> +define <4 x i32> @ternary_A_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_B_nor_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %or = or <4 x i32> %B, %C + %nor = xor <4 x i32> %or, ; Vector NOR operation + %res = select <4 x i1> %A, <4 x i32> %B, <4 x i32> %nor + ret <4 x i32> %res +} + +; Function to test ternary(A, B, nor(B,C)) for <2 x i64> +define <2 x i64> @ternary_A_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_B_nor_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %or = or <2 x i64> %B, %C + %nor = xor <2 x i64> %or, ; Vector NOR operation + %res = select <2 x i1> %A, <2 x i64> %B, <2 x i64> %nor + ret <2 x i64> %res +} + +; Function to test ternary(A, B, nor(B,C)) for <16 x i8> +define <16 x i8> @ternary_A_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_B_nor_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %or = or <16 x i8> %B, %C + %nor = xor <16 x i8> %or, ; Vector NOR operation + %res = select <16 x i1> %A, <16 x i8> %B, <16 x i8> %nor + ret <16 x i8> %res +} + +; Function to test ternary(A, B, nor(B,C)) for <8 x i16> +define <8 x i16> @ternary_A_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_B_nor_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %or = or <8 x i16> %B, %C + %nor = xor <8 x i16> %or, ; Vector NOR operation + %res = select <8 x i1> %A, <8 x i16> %B, <8 x i16> %nor + ret <8 x i16> %res +} + +; Function to test ternary(A, C, nor(B,C)) for <4 x i32> +define <4 x i32> @ternary_A_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_C_nor_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: blr +entry: + %or = or <4 x i32> %B, %C + %nor = xor <4 x i32> %or, ; Vector NOR operation + %res = select <4 x i1> %A, <4 x i32> %C, <4 x i32> %nor + ret <4 x i32> %res +} + +; Function to test ternary(A, C, nor(B,C)) for <2 x i64> +define <2 x i64> @ternary_A_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_C_nor_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: blr +entry: + %or = or <2 x i64> %B, %C + %nor = xor <2 x i64> %or, ; Vector NOR operation + %res = select <2 x i1> %A, <2 x i64> %C, <2 x i64> %nor + ret <2 x i64> %res +} + +; Function to test ternary(A, C, nor(B,C)) for <16 x i8> +define <16 x i8> @ternary_A_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_C_nor_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: blr +entry: + %or = or <16 x i8> %B, %C + %nor = xor <16 x i8> %or, ; Vector NOR operation + %res = select <16 x i1> %A, <16 x i8> %C, <16 x i8> %nor + ret <16 x i8> %res +} + +; Function to test ternary(A, C, nor(B,C)) for <8 x i16> +define <8 x i16> @ternary_A_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_C_nor_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: blr +entry: + %or = or <8 x i16> %B, %C + %nor = xor <8 x i16> %or, ; Vector NOR operation + %res = select <8 x i1> %A, <8 x i16> %C, <8 x i16> %nor + ret <8 x i16> %res +} + +; Function to test ternary(A, xor(B,C), nor(B,C)) for <4 x i32> +define <4 x i32> @ternary_A_xor_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_xor_BC_nor_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <4 x i32> %B, %C + %or = or <4 x i32> %B, %C + %nor = xor <4 x i32> %or, ; Vector NOR operation + %res = select <4 x i1> %A, <4 x i32> %xor, <4 x i32> %nor + ret <4 x i32> %res +} + +; Function to test ternary(A, xor(B,C), nor(B,C)) for <2 x i64> +define <2 x i64> @ternary_A_xor_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_xor_BC_nor_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <2 x i64> %B, %C + %or = or <2 x i64> %B, %C + %nor = xor <2 x i64> %or, ; Vector NOR operation + %res = select <2 x i1> %A, <2 x i64> %xor, <2 x i64> %nor + ret <2 x i64> %res +} + +; Function to test ternary(A, xor(B,C), nor(B,C)) for <16 x i8> +define <16 x i8> @ternary_A_xor_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_xor_BC_nor_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <16 x i8> %B, %C + %or = or <16 x i8> %B, %C + %nor = xor <16 x i8> %or, ; Vector NOR operation + %res = select <16 x i1> %A, <16 x i8> %xor, <16 x i8> %nor + ret <16 x i8> %res +} + +; Function to test ternary(A, xor(B,C), nor(B,C)) for <8 x i16> +define <8 x i16> @ternary_A_xor_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_xor_BC_nor_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <8 x i16> %B, %C + %or = or <8 x i16> %B, %C + %nor = xor <8 x i16> %or, ; Vector NOR operation + %res = select <8 x i1> %A, <8 x i16> %xor, <8 x i16> %nor + ret <8 x i16> %res +} + +; Function to test ternary(A, not(C), nor(B,C)) for <4 x i32> +define <4 x i32> @ternary_A_not_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_not_C_nor_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <4 x i32> %C, ; Vector not operation + %or = or <4 x i32> %B, %C + %nor = xor <4 x i32> %or, ; Vector NOR operation + %res = select <4 x i1> %A, <4 x i32> %not, <4 x i32> %nor + ret <4 x i32> %res +} + +; Function to test ternary(A, not(C), nor(B,C)) for <2 x i64> +define <2 x i64> @ternary_A_not_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_not_C_nor_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <2 x i64> %C, ; Vector not operation + %or = or <2 x i64> %B, %C + %nor = xor <2 x i64> %or, ; Vector NOR operation + %res = select <2 x i1> %A, <2 x i64> %not, <2 x i64> %nor + ret <2 x i64> %res +} + +; Function to test ternary(A, not(C), nor(B,C)) for <16 x i8> +define <16 x i8> @ternary_A_not_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_not_C_nor_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <16 x i8> %C, ; Vector not operation + %or = or <16 x i8> %B, %C + %nor = xor <16 x i8> %or, ; Vector NOR operation + %res = select <16 x i1> %A, <16 x i8> %not, <16 x i8> %nor + ret <16 x i8> %res +} + +; Function to test ternary(A, not(C), nor(B,C)) for <8 x i16> +define <8 x i16> @ternary_A_not_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_not_C_nor_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <8 x i16> %C, ; Vector not operation + %or = or <8 x i16> %B, %C + %nor = xor <8 x i16> %or, ; Vector NOR operation + %res = select <8 x i1> %A, <8 x i16> %not, <8 x i16> %nor + ret <8 x i16> %res +} + +; Function to test ternary(A, not(B), nor(B,C)) for <4 x i32> +define <4 x i32> @ternary_A_not_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_not_B_nor_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v3 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <4 x i32> %B, ; Vector not operation + %or = or <4 x i32> %B, %C + %nor = xor <4 x i32> %or, ; Vector NOR operation + %res = select <4 x i1> %A, <4 x i32> %not, <4 x i32> %nor + ret <4 x i32> %res +} + +; Function to test ternary(A, not(B), nor(B,C)) for <2 x i64> +define <2 x i64> @ternary_A_not_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_not_B_nor_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v3 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <2 x i64> %B, ; Vector not operation + %or = or <2 x i64> %B, %C + %nor = xor <2 x i64> %or, ; Vector NOR operation + %res = select <2 x i1> %A, <2 x i64> %not, <2 x i64> %nor + ret <2 x i64> %res +} + +; Function to test ternary(A, not(B), nor(B,C)) for <16 x i8> +define <16 x i8> @ternary_A_not_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_not_B_nor_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v3, v3 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <16 x i8> %B, ; Vector not operation + %or = or <16 x i8> %B, %C + %nor = xor <16 x i8> %or, ; Vector NOR operation + %res = select <16 x i1> %A, <16 x i8> %not, <16 x i8> %nor + ret <16 x i8> %res +} + +; Function to test ternary(A, not(B), nor(B,C)) for <8 x i16> +define <8 x i16> @ternary_A_not_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_not_B_nor_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v3, v3 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not = xor <8 x i16> %B, ; Vector not operation + %or = or <8 x i16> %B, %C + %nor = xor <8 x i16> %or, ; Vector NOR operation + %res = select <8 x i1> %A, <8 x i16> %not, <8 x i16> %nor + ret <8 x i16> %res +} + +; Function to test ternary(A, nand(B,C), nor(B,C)) for <4 x i32> +define <4 x i32> @ternary_A_nand_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_nand_BC_nor_BC_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <4 x i32> %B, %C + %nand = xor <4 x i32> %and, ; Vector nand operation + %or = or <4 x i32> %B, %C + %nor = xor <4 x i32> %or, ; Vector NOR operation + %res = select <4 x i1> %A, <4 x i32> %nand, <4 x i32> %nor + ret <4 x i32> %res +} + +; Function to test ternary(A, nand(B,C), nor(B,C)) for <2 x i64> +define <2 x i64> @ternary_A_nand_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_nand_BC_nor_BC_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <2 x i64> %B, %C + %nand = xor <2 x i64> %and, ; Vector nand operation + %or = or <2 x i64> %B, %C + %nor = xor <2 x i64> %or, ; Vector NOR operation + %res = select <2 x i1> %A, <2 x i64> %nand, <2 x i64> %nor + ret <2 x i64> %res +} + +; Function to test ternary(A, nand(B,C), nor(B,C)) for <16 x i8> +define <16 x i8> @ternary_A_nand_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_nand_BC_nor_BC_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <16 x i8> %B, %C + %nand = xor <16 x i8> %and, ; Vector nand operation + %or = or <16 x i8> %B, %C + %nor = xor <16 x i8> %or, ; Vector NOR operation + %res = select <16 x i1> %A, <16 x i8> %nand, <16 x i8> %nor + ret <16 x i8> %res +} + +; Function to test ternary(A, nand(B,C), nor(B,C)) for <8 x i16> +define <8 x i16> @ternary_A_nand_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_nand_BC_nor_BC_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <8 x i16> %B, %C + %nand = xor <8 x i16> %and, ; Vector nand operation + %or = or <8 x i16> %B, %C + %nor = xor <8 x i16> %or, ; Vector NOR operation + %res = select <8 x i1> %A, <8 x i16> %nand, <8 x i16> %nor + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll new file mode 100644 index 0000000000000..6203a96555395 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; Function to test ternary(A, and(B, C), not(B)) for <4 x i32> +define <4 x i32> @ternary_A_and_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_and_BC_not_B_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <4 x i32> %B, %C + %not = xor <4 x i32> %B, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %and, <4 x i32> %not + ret <4 x i32> %res +} + +; Function to test ternary(A, and(B, C), not(B)) for <2 x i64> +define <2 x i64> @ternary_A_and_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_and_BC_not_B_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <2 x i64> %B, %C + %not = xor <2 x i64> %B, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %and, <2 x i64> %not + ret <2 x i64> %res +} + +; Function to test ternary(A, and(B, C), not(B)) for <16 x i8> +define <16 x i8> @ternary_A_and_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_and_BC_not_B_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <16 x i8> %B, %C + %not = xor <16 x i8> %B, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %and, <16 x i8> %not + ret <16 x i8> %res +} + +; Function to test ternary(A, and(B, C), not(B)) for <8 x i16> +define <8 x i16> @ternary_A_and_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_and_BC_not_B_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <8 x i16> %B, %C + %not = xor <8 x i16> %B, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %and, <8 x i16> %not + ret <8 x i16> %res +} + +; Function to test ternary(A, xor(B, C), not(B)) for <4 x i32> +define <4 x i32> @ternary_A_xor_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_xor_BC_not_B_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <4 x i32> %B, %C + %not = xor <4 x i32> %B, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %xor, <4 x i32> %not + ret <4 x i32> %res +} + +; Function to test ternary(A, xor(B, C), not(B)) for <2 x i64> +define <2 x i64> @ternary_A_xor_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_xor_BC_not_B_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <2 x i64> %B, %C + %not = xor <2 x i64> %B, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %xor, <2 x i64> %not + ret <2 x i64> %res +} + +; Function to test ternary(A, xor(B, C), not(B)) for <16 x i8> +define <16 x i8> @ternary_A_xor_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_xor_BC_not_B_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <16 x i8> %B, %C + %not = xor <16 x i8> %B, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %xor, <16 x i8> %not + ret <16 x i8> %res +} + +; Function to test ternary(A, xor(B, C), not(B)) for <8 x i16> +define <8 x i16> @ternary_A_xor_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_xor_BC_not_B_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <8 x i16> %B, %C + %not = xor <8 x i16> %B, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %xor, <8 x i16> %not + ret <8 x i16> %res +} + +; Function to test ternary(A, or(B, C), not(B)) for <4 x i32> +define <4 x i32> @ternary_A_or_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_or_BC_not_B_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <4 x i32> %B, %C + %not = xor <4 x i32> %B, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %or, <4 x i32> %not + ret <4 x i32> %res +} + +; Function to test ternary(A, or(B, C), not(B)) for <2 x i64> +define <2 x i64> @ternary_A_or_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_or_BC_not_B_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <2 x i64> %B, %C + %not = xor <2 x i64> %B, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %or, <2 x i64> %not + ret <2 x i64> %res +} + +; Function to test ternary(A, or(B, C), not(B)) for <16 x i8> +define <16 x i8> @ternary_A_or_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_or_BC_not_B_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <16 x i8> %B, %C + %not = xor <16 x i8> %B, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %or, <16 x i8> %not + ret <16 x i8> %res +} + +; Function to test ternary(A, or(B, C), not(B)) for <8 x i16> +define <8 x i16> @ternary_A_or_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_or_BC_not_B_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <8 x i16> %B, %C + %not = xor <8 x i16> %B, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %or, <8 x i16> %not + ret <8 x i16> %res +} + +; Function to test ternary(A, nand(B, C), not(B)) for <4 x i32> +define <4 x i32> @ternary_A_nand_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_nand_BC_not_B_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <4 x i32> %B, %C + %nand = xor <4 x i32> %and, ; Vector nand operation + %not = xor <4 x i32> %B, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %nand, <4 x i32> %not + ret <4 x i32> %res +} + +; Function to test ternary(A, nand(B, C), not(B)) for <2 x i64> +define <2 x i64> @ternary_A_nand_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_nand_BC_not_B_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <2 x i64> %B, %C + %nand = xor <2 x i64> %and, ; Vector nand operation + %not = xor <2 x i64> %B, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %nand, <2 x i64> %not + ret <2 x i64> %res +} + +; Function to test ternary(A, nand(B, C), not(B)) for <16 x i8> +define <16 x i8> @ternary_A_nand_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_nand_BC_not_B_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <16 x i8> %B, %C + %nand = xor <16 x i8> %and, ; Vector nand operation + %not = xor <16 x i8> %B, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %nand, <16 x i8> %not + ret <16 x i8> %res +} + +; Function to test ternary(A, nand(B, C), not(B)) for <8 x i16> +define <8 x i16> @ternary_A_nand_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_nand_BC_not_B_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <8 x i16> %B, %C + %nand = xor <8 x i16> %and, ; Vector nand operation + %not = xor <8 x i16> %B, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %nand, <8 x i16> %not + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll new file mode 100644 index 0000000000000..3479d949439be --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll @@ -0,0 +1,445 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; Function to test ternary(A, and(B, C), not(C)) for <4 x i32> +define <4 x i32> @ternary_A_and_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_and_BC_not_C_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <4 x i32> %B, %C + %not = xor <4 x i32> %C, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %and, <4 x i32> %not + ret <4 x i32> %res +} + +; Function to test ternary(A, and(B, C), not(C)) for <2 x i64> +define <2 x i64> @ternary_A_and_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_and_BC_not_C_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <2 x i64> %B, %C + %not = xor <2 x i64> %C, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %and, <2 x i64> %not + ret <2 x i64> %res +} + +; Function to test ternary(A, and(B, C), not(C)) for <16 x i8> +define <16 x i8> @ternary_A_and_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_and_BC_not_C_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <16 x i8> %B, %C + %not = xor <16 x i8> %C, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %and, <16 x i8> %not + ret <16 x i8> %res +} + +; Function to test ternary(A, and(B, C), not(C)) for <8 x i16> +define <8 x i16> @ternary_A_and_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_and_BC_not_C_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <8 x i16> %B, %C + %not = xor <8 x i16> %C, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %and, <8 x i16> %not + ret <8 x i16> %res +} + +; Function to test ternary(A, B, not(C)) for <4 x i32> +define <4 x i32> @ternary_A_B_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_B_not_C_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %not = xor <4 x i32> %C, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %B, <4 x i32> %not + ret <4 x i32> %res +} + +; Function to test ternary(A, B, not(C)) for <2 x i64> +define <2 x i64> @ternary_A_B_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_B_not_C_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %not = xor <2 x i64> %C, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %B, <2 x i64> %not + ret <2 x i64> %res +} + +; Function to test ternary(A, B, not(C)) for <16 x i8> +define <16 x i8> @ternary_A_B_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_B_not_C_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %not = xor <16 x i8> %C, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %B, <16 x i8> %not + ret <16 x i8> %res +} + +; Function to test ternary(A, B, not(C)) for <8 x i16> +define <8 x i16> @ternary_A_B_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_B_not_C_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: blr +entry: + %not = xor <8 x i16> %C, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %B, <8 x i16> %not + ret <8 x i16> %res +} + +; Function to test ternary(A, xor(B, C), not(C)) for <4 x i32> +define <4 x i32> @ternary_A_xor_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_xor_BC_not_C_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <4 x i32> %B, %C + %not = xor <4 x i32> %C, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %xor, <4 x i32> %not + ret <4 x i32> %res +} + +; Function to test ternary(A, xor(B, C), not(C)) for <2 x i64> +define <2 x i64> @ternary_A_xor_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_xor_BC_not_C_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <2 x i64> %B, %C + %not = xor <2 x i64> %C, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %xor, <2 x i64> %not + ret <2 x i64> %res +} + +; Function to test ternary(A, xor(B, C), not(C)) for <16 x i8> +define <16 x i8> @ternary_A_xor_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_xor_BC_not_C_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <16 x i8> %B, %C + %not = xor <16 x i8> %C, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %xor, <16 x i8> %not + ret <16 x i8> %res +} + +; Function to test ternary(A, xor(B, C), not(C)) for <8 x i16> +define <8 x i16> @ternary_A_xor_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_xor_BC_not_C_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %xor = xor <8 x i16> %B, %C + %not = xor <8 x i16> %C, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %xor, <8 x i16> %not + ret <8 x i16> %res +} + +; Function to test ternary(A, or(B, C), not(C)) for <4 x i32> +define <4 x i32> @ternary_A_or_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_or_BC_not_C_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <4 x i32> %B, %C + %not = xor <4 x i32> %C, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %or, <4 x i32> %not + ret <4 x i32> %res +} + +; Function to test ternary(A, or(B, C), not(C)) for <2 x i64> +define <2 x i64> @ternary_A_or_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_or_BC_not_C_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <2 x i64> %B, %C + %not = xor <2 x i64> %C, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %or, <2 x i64> %not + ret <2 x i64> %res +} + +; Function to test ternary(A, or(B, C), not(C)) for <16 x i8> +define <16 x i8> @ternary_A_or_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_or_BC_not_C_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <16 x i8> %B, %C + %not = xor <16 x i8> %C, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %or, <16 x i8> %not + ret <16 x i8> %res +} + +; Function to test ternary(A, or(B, C), not(C)) for <8 x i16> +define <8 x i16> @ternary_A_or_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_or_BC_not_C_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %or = or <8 x i16> %B, %C + %not = xor <8 x i16> %C, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %or, <8 x i16> %not + ret <8 x i16> %res +} + +; Function to test ternary(A, not(B), not(C)) for <4 x i32> +define <4 x i32> @ternary_A_not_B_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_not_B_not_C_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v3 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not_b = xor <4 x i32> %B, ; Vector not operation + %not_c = xor <4 x i32> %C, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %not_b, <4 x i32> %not_c + ret <4 x i32> %res +} + +; Function to test ternary(A, not(B), not(C)) for <2 x i64> +define <2 x i64> @ternary_A_not_B_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_not_B_not_C_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v3 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not_b = xor <2 x i64> %B, ; Vector not operation + %not_c = xor <2 x i64> %C, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %not_b, <2 x i64> %not_c + ret <2 x i64> %res +} + +; Function to test ternary(A, not(B), not(C)) for <16 x i8> +define <16 x i8> @ternary_A_not_B_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_not_B_not_C_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v3, v3 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not_b = xor <16 x i8> %B, ; Vector not operation + %not_c = xor <16 x i8> %C, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %not_b, <16 x i8> %not_c + ret <16 x i8> %res +} + +; Function to test ternary(A, not(B), not(C)) for <8 x i16> +define <8 x i16> @ternary_A_not_B_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_not_B_not_C_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v3, v3 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %not_b = xor <8 x i16> %B, ; Vector not operation + %not_c = xor <8 x i16> %C, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %not_b, <8 x i16> %not_c + ret <8 x i16> %res +} + +; Function to test ternary(A, nand(B, C), not(C)) for <4 x i32> +define <4 x i32> @ternary_A_nand_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) { +; CHECK-LABEL: ternary_A_nand_BC_not_C_4x32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: vsraw v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <4 x i32> %B, %C + %nand = xor <4 x i32> %and, ; Vector nand operation + %not = xor <4 x i32> %C, ; Vector not operation + %res = select <4 x i1> %A, <4 x i32> %nand, <4 x i32> %not + ret <4 x i32> %res +} + +; Function to test ternary(A, nand(B, C), not(C)) for <2 x i64> +define <2 x i64> @ternary_A_nand_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) { +; CHECK-LABEL: ternary_A_nand_BC_not_C_2x64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: vsld v2, v2, v5 +; CHECK-NEXT: vsrad v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <2 x i64> %B, %C + %nand = xor <2 x i64> %and, ; Vector nand operation + %not = xor <2 x i64> %C, ; Vector not operation + %res = select <2 x i1> %A, <2 x i64> %nand, <2 x i64> %not + ret <2 x i64> %res +} + +; Function to test ternary(A, nand(B, C), not(C)) for <16 x i8> +define <16 x i8> @ternary_A_nand_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: ternary_A_nand_BC_not_C_16x8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslb v2, v2, v5 +; CHECK-NEXT: vsrab v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <16 x i8> %B, %C + %nand = xor <16 x i8> %and, ; Vector nand operation + %not = xor <16 x i8> %C, ; Vector not operation + %res = select <16 x i1> %A, <16 x i8> %nand, <16 x i8> %not + ret <16 x i8> %res +} + +; Function to test ternary(A, nand(B, C), not(C)) for <8 x i16> +define <8 x i16> @ternary_A_nand_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C) { +; CHECK-LABEL: ternary_A_nand_BC_not_C_8x16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: vslh v2, v2, v5 +; CHECK-NEXT: vsrah v2, v2, v5 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: blr +entry: + %and = and <8 x i16> %B, %C + %nand = xor <8 x i16> %and, ; Vector nand operation + %not = xor <8 x i16> %C, ; Vector not operation + %res = select <8 x i1> %A, <8 x i16> %nand, <8 x i16> %not + ret <8 x i16> %res +} From 092de9bb90cbcee445b31e504a4c2a09ecf09714 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Fri, 12 Sep 2025 00:21:18 -0400 Subject: [PATCH 071/734] [InstCombine] Enable FAdd simplifications when user can ignore sign bit (#157757) When FAdd result is used by fabs, we can safely ignore the sign bit of fp zero. This patch enables an instruction simplification optimization that folds fadd x, 0 ==> x, which would otherwise not work as the compiler cannot prove that the zero isn't -0. But if the result of the fadd is used by fabs we can simply ignore this and still do the optimization. Fixes #154238 --- .../InstCombine/InstCombineAddSub.cpp | 20 ++++++++++ .../fold-fadd-with-zero-gh154238.ll | 37 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 71c53e37c7757..726d09aa26941 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2002,6 +2002,16 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) { if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I)) return FoldedFAdd; + // B = fadd A, 0.0 + // Z = Op B + // can be transformed into + // Z = Op A + // Where Op is such that we can ignore sign of 0 in fadd + Value *A; + if (match(&I, m_OneUse(m_FAdd(m_Value(A), m_AnyZeroFP()))) && + canIgnoreSignBitOfZero(*I.use_begin())) + return replaceInstUsesWith(I, A); + // (-X) + Y --> Y - X Value *X, *Y; if (match(&I, m_c_FAdd(m_FNeg(m_Value(X)), m_Value(Y)))) @@ -3145,6 +3155,16 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { Value *X, *Y; Constant *C; + // B = fsub A, 0.0 + // Z = Op B + // can be transformed into + // Z = Op A + // Where Op is such that we can ignore sign of 0 in fsub + Value *A; + if (match(&I, m_OneUse(m_FSub(m_Value(A), m_AnyZeroFP()))) && + canIgnoreSignBitOfZero(*I.use_begin())) + return replaceInstUsesWith(I, A); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X) // Canonicalize to fadd to make analysis easier. diff --git a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll new file mode 100644 index 0000000000000..b9d951dc2945a --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +define float @src(float %arg1) { +; CHECK-LABEL: define float @src( +; CHECK-SAME: float [[ARG1:%.*]]) { +; CHECK-NEXT: [[V3:%.*]] = call float @llvm.fabs.f32(float [[ARG1]]) +; CHECK-NEXT: ret float [[V3]] +; + %v2 = fadd float %arg1, 0.000000e+00 + %v3 = call float @llvm.fabs.f32(float %v2) + ret float %v3 +} + +define float @src2(float %arg1) { +; CHECK-LABEL: define float @src2( +; CHECK-SAME: float [[ARG1:%.*]]) { +; CHECK-NEXT: [[V2:%.*]] = fadd float [[ARG1]], 0.000000e+00 +; CHECK-NEXT: [[V3:%.*]] = call float @llvm.fabs.f32(float [[V2]]) +; CHECK-NEXT: [[V4:%.*]] = fsub float [[V2]], [[V3]] +; CHECK-NEXT: ret float [[V4]] +; + %v2 = fadd float %arg1, 0.000000e+00 + %v3 = call float @llvm.fabs.f32(float %v2) + %v4 = fsub float %v2, %v3 + ret float %v4 +} + +define float @src_sub(float %arg1) { +; CHECK-LABEL: define float @src_sub( +; CHECK-SAME: float [[ARG1:%.*]]) { +; CHECK-NEXT: [[V3:%.*]] = call float @llvm.fabs.f32(float [[ARG1]]) +; CHECK-NEXT: ret float [[V3]] +; + %v2 = fsub float %arg1, 0.000000e+00 + %v3 = call float @llvm.fabs.f32(float %v2) + ret float %v3 +} From d5aa5e33e6fd7c0d4035be5e7877f275b3205a36 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 11 Sep 2025 21:27:54 -0700 Subject: [PATCH 072/734] [libc] Update errno usage in integration tests. (#158147) Instead of using libc_errno directly, include and use regular "errno" in the code. (to verify that errno-as-an-interface works properly). This is the recipe prescribed in the libc/src/__support/libc_errno.h header - let's actually follow it in the integration tests. --- libc/test/IntegrationTest/test.h | 6 +++--- libc/test/integration/src/pthread/pthread_create_test.cpp | 5 ++--- libc/test/integration/src/pthread/pthread_join_test.cpp | 6 +++--- libc/test/integration/src/pthread/pthread_name_test.cpp | 2 +- libc/test/integration/src/unistd/getcwd_test.cpp | 7 +++---- libc/test/integration/startup/linux/tls_test.cpp | 2 +- 6 files changed, 13 insertions(+), 15 deletions(-) diff --git a/libc/test/IntegrationTest/test.h b/libc/test/IntegrationTest/test.h index 24c007d2e12e6..4a03f7aa6318b 100644 --- a/libc/test/IntegrationTest/test.h +++ b/libc/test/IntegrationTest/test.h @@ -68,9 +68,9 @@ //////////////////////////////////////////////////////////////////////////////// // Errno checks. -#define ASSERT_ERRNO_EQ(VAL) ASSERT_EQ(VAL, static_cast(libc_errno)) -#define ASSERT_ERRNO_SUCCESS() ASSERT_EQ(0, static_cast(libc_errno)) -#define ASSERT_ERRNO_FAILURE() ASSERT_NE(0, static_cast(libc_errno)) +#define ASSERT_ERRNO_EQ(VAL) ASSERT_EQ(VAL, static_cast(errno)) +#define ASSERT_ERRNO_SUCCESS() ASSERT_EQ(0, static_cast(errno)) +#define ASSERT_ERRNO_FAILURE() ASSERT_NE(0, static_cast(errno)) // Integration tests are compiled with -ffreestanding which stops treating // the main function as a non-overloadable special function. Hence, we use a diff --git a/libc/test/integration/src/pthread/pthread_create_test.cpp b/libc/test/integration/src/pthread/pthread_create_test.cpp index aecbad6514aaa..abd348e707c09 100644 --- a/libc/test/integration/src/pthread/pthread_create_test.cpp +++ b/libc/test/integration/src/pthread/pthread_create_test.cpp @@ -29,10 +29,9 @@ #include "src/__support/CPP/new.h" #include "src/__support/threads/thread.h" -#include "src/__support/libc_errno.h" - #include "test/IntegrationTest/test.h" +#include #include // For EXEC_PAGESIZE. #include @@ -332,7 +331,7 @@ static void run_failure_tests() { } TEST_MAIN() { - libc_errno = 0; + errno = 0; run_success_tests(); run_failure_tests(); return 0; diff --git a/libc/test/integration/src/pthread/pthread_join_test.cpp b/libc/test/integration/src/pthread/pthread_join_test.cpp index 5d0bcd8e23658..6dea99de1a64f 100644 --- a/libc/test/integration/src/pthread/pthread_join_test.cpp +++ b/libc/test/integration/src/pthread/pthread_join_test.cpp @@ -9,9 +9,9 @@ #include "src/pthread/pthread_create.h" #include "src/pthread/pthread_join.h" -#include "src/__support/libc_errno.h" - #include "test/IntegrationTest/test.h" + +#include #include static void *simpleFunc(void *) { return nullptr; } @@ -25,7 +25,7 @@ static void nullJoinTest() { } TEST_MAIN() { - libc_errno = 0; + errno = 0; nullJoinTest(); return 0; } diff --git a/libc/test/integration/src/pthread/pthread_name_test.cpp b/libc/test/integration/src/pthread/pthread_name_test.cpp index 343a22356593a..d2a5ffc544ec9 100644 --- a/libc/test/integration/src/pthread/pthread_name_test.cpp +++ b/libc/test/integration/src/pthread/pthread_name_test.cpp @@ -8,7 +8,6 @@ #include "hdr/stdint_proxy.h" // uintptr_t #include "src/__support/CPP/string_view.h" -#include "src/__support/libc_errno.h" #include "src/pthread/pthread_create.h" #include "src/pthread/pthread_getname_np.h" #include "src/pthread/pthread_join.h" @@ -20,6 +19,7 @@ #include "src/pthread/pthread_setname_np.h" #include "test/IntegrationTest/test.h" +#include #include using string_view = LIBC_NAMESPACE::cpp::string_view; diff --git a/libc/test/integration/src/unistd/getcwd_test.cpp b/libc/test/integration/src/unistd/getcwd_test.cpp index 1b321b01e9315..7b87a8f0ed41c 100644 --- a/libc/test/integration/src/unistd/getcwd_test.cpp +++ b/libc/test/integration/src/unistd/getcwd_test.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/__support/CPP/string_view.h" -#include "src/__support/libc_errno.h" #include "src/stdlib/getenv.h" #include "src/unistd/getcwd.h" #include "test/IntegrationTest/test.h" +#include #include // For malloc and free using LIBC_NAMESPACE::cpp::string_view; @@ -31,13 +31,12 @@ TEST_MAIN(int argc, char **argv, char **envp) { cwd = LIBC_NAMESPACE::getcwd(buffer, 0); ASSERT_TRUE(cwd == nullptr); ASSERT_ERRNO_EQ(EINVAL); - libc_errno = 0; // Insufficient size + errno = 0; cwd = LIBC_NAMESPACE::getcwd(buffer, 2); ASSERT_TRUE(cwd == nullptr); - int err = libc_errno; - ASSERT_EQ(err, ERANGE); + ASSERT_ERRNO_EQ(ERANGE); return 0; } diff --git a/libc/test/integration/startup/linux/tls_test.cpp b/libc/test/integration/startup/linux/tls_test.cpp index de3bd06c39cf6..688a94bdeb6fb 100644 --- a/libc/test/integration/startup/linux/tls_test.cpp +++ b/libc/test/integration/startup/linux/tls_test.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/libc_errno.h" #include "src/sys/mman/mmap.h" #include "test/IntegrationTest/test.h" +#include #include constexpr int threadLocalDataSize = 101; From aef2f41f3fdc12d40db244e61fbe812db5612dda Mon Sep 17 00:00:00 2001 From: Victor Chernyakin Date: Thu, 11 Sep 2025 22:04:56 -0700 Subject: [PATCH 073/734] [clang-tidy][NFC] Enable `performance-type-promotion-in-math-fn` check in the codebase (#158186) Closes #156155. --- clang-tools-extra/clang-tidy/.clang-tidy | 1 - .../clang-tidy/altera/StructPackAlignCheck.cpp | 4 ++-- .../clang-tidy/altera/UnrollLoopsCheck.cpp | 14 ++++++++------ 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/clang-tools-extra/clang-tidy/.clang-tidy b/clang-tools-extra/clang-tidy/.clang-tidy index ae554c6668a84..d290901730405 100644 --- a/clang-tools-extra/clang-tidy/.clang-tidy +++ b/clang-tools-extra/clang-tidy/.clang-tidy @@ -16,7 +16,6 @@ Checks: > performance-*, -performance-enum-size, -performance-no-int-to-ptr, - -performance-type-promotion-in-math-fn, -performance-unnecessary-value-param, readability-*, -readability-avoid-nested-conditional-operator, diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp index 789327b196ab6..d7b8f7bc62409 100644 --- a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp @@ -76,9 +76,9 @@ void StructPackAlignCheck::check(const MatchFinder::MatchResult &Result) { CharUnits CurrSize = Result.Context->getASTRecordLayout(Struct).getSize(); CharUnits MinByteSize = CharUnits::fromQuantity(std::max( - ceil(static_cast(TotalBitSize) / CharSize), 1)); + std::ceil(static_cast(TotalBitSize) / CharSize), 1)); CharUnits MaxAlign = CharUnits::fromQuantity( - ceil((float)Struct->getMaxAlignment() / CharSize)); + std::ceil((float)Struct->getMaxAlignment() / CharSize)); CharUnits CurrAlign = Result.Context->getASTRecordLayout(Struct).getAlignment(); CharUnits NewAlign = computeRecommendedAlignment(MinByteSize); diff --git a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp index 0bb9d6e4a7cee..9846a573a8c29 100644 --- a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp @@ -208,18 +208,20 @@ bool UnrollLoopsCheck::hasLargeNumIterations(const Stmt *Statement, return true; switch (Op->getOpcode()) { case (BO_AddAssign): - Iterations = ceil(float(EndValue - InitValue) / ConstantValue); + Iterations = std::ceil(float(EndValue - InitValue) / ConstantValue); break; case (BO_SubAssign): - Iterations = ceil(float(InitValue - EndValue) / ConstantValue); + Iterations = std::ceil(float(InitValue - EndValue) / ConstantValue); break; case (BO_MulAssign): - Iterations = 1 + (log((double)EndValue) - log((double)InitValue)) / - log((double)ConstantValue); + Iterations = + 1 + (std::log((double)EndValue) - std::log((double)InitValue)) / + std::log((double)ConstantValue); break; case (BO_DivAssign): - Iterations = 1 + (log((double)InitValue) - log((double)EndValue)) / - log((double)ConstantValue); + Iterations = + 1 + (std::log((double)InitValue) - std::log((double)EndValue)) / + std::log((double)ConstantValue); break; default: // All other operators are not handled; assume large bounds. From fefe670be067cd698a71ab8dcd6a92803e2192ef Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Fri, 12 Sep 2025 07:12:09 +0200 Subject: [PATCH 074/734] [clang][bytecode] Compile the definition, not the most recent decl (#158093) --- clang/lib/AST/ByteCode/ByteCodeEmitter.cpp | 6 ++---- clang/lib/AST/ByteCode/Function.h | 7 ++++--- clang/lib/AST/ByteCode/Interp.cpp | 7 +++++-- clang/test/Modules/lambda-merge.cpp | 1 + 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp b/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp index 1d71708799518..274efccac79dc 100644 --- a/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp +++ b/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp @@ -24,15 +24,13 @@ void ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl, Function *Func) { assert(FuncDecl); assert(Func); + assert(FuncDecl->isThisDeclarationADefinition()); // Manually created functions that haven't been assigned proper // parameters yet. if (!FuncDecl->param_empty() && !FuncDecl->param_begin()) return; - if (!FuncDecl->isDefined()) - return; - // Set up lambda captures. if (const auto *MD = dyn_cast(FuncDecl); MD && isLambdaCallOperator(MD)) { @@ -87,7 +85,7 @@ void ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl, } // Set the function's code. - Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap), + Func->setCode(FuncDecl, NextLocalOffset, std::move(Code), std::move(SrcMap), std::move(Scopes), FuncDecl->hasBody()); Func->setIsFullyCompiled(true); } diff --git a/clang/lib/AST/ByteCode/Function.h b/clang/lib/AST/ByteCode/Function.h index af429b7849e88..95add5809afcc 100644 --- a/clang/lib/AST/ByteCode/Function.h +++ b/clang/lib/AST/ByteCode/Function.h @@ -236,9 +236,10 @@ class Function final { bool HasRVO, bool IsLambdaStaticInvoker); /// Sets the code of a function. - void setCode(unsigned NewFrameSize, llvm::SmallVector &&NewCode, - SourceMap &&NewSrcMap, llvm::SmallVector &&NewScopes, - bool NewHasBody) { + void setCode(FunctionDeclTy Source, unsigned NewFrameSize, + llvm::SmallVector &&NewCode, SourceMap &&NewSrcMap, + llvm::SmallVector &&NewScopes, bool NewHasBody) { + this->Source = Source; FrameSize = NewFrameSize; Code = std::move(NewCode); SrcMap = std::move(NewSrcMap); diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index b961a413fbe78..d5e75a0c90469 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1493,9 +1493,12 @@ bool CheckDestructor(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { } static void compileFunction(InterpState &S, const Function *Func) { + const FunctionDecl *Definition = Func->getDecl()->getDefinition(); + if (!Definition) + return; + Compiler(S.getContext(), S.P) - .compileFunc(Func->getDecl()->getMostRecentDecl(), - const_cast(Func)); + .compileFunc(Definition, const_cast(Func)); } bool CallVar(InterpState &S, CodePtr OpPC, const Function *Func, diff --git a/clang/test/Modules/lambda-merge.cpp b/clang/test/Modules/lambda-merge.cpp index e996c9c0d5d1f..6b61d356ec581 100644 --- a/clang/test/Modules/lambda-merge.cpp +++ b/clang/test/Modules/lambda-merge.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fmodules -std=c++17 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s +// RUN: %clang_cc1 -fmodules -std=c++17 -emit-llvm %s -o - -triple x86_64-linux-gnu -fexperimental-new-constant-interpreter | FileCheck %s #pragma clang module build A module A {} From df2a7a9ec9afbcc2d1ab87cdcb55059fc2a0aaa8 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 11 Sep 2025 22:16:11 -0700 Subject: [PATCH 075/734] [ADT] clang-format several files (NFC) I'm planning to modify these files. --- llvm/include/llvm/ADT/IndexedMap.h | 101 +++++++++++-------------- llvm/include/llvm/ADT/SparseMultiSet.h | 39 ++++------ llvm/include/llvm/ADT/SparseSet.h | 32 +++----- llvm/include/llvm/ADT/identity.h | 11 +-- 4 files changed, 74 insertions(+), 109 deletions(-) diff --git a/llvm/include/llvm/ADT/IndexedMap.h b/llvm/include/llvm/ADT/IndexedMap.h index b1ebbdd1bfd54..cda0316dc78fa 100644 --- a/llvm/include/llvm/ADT/IndexedMap.h +++ b/llvm/include/llvm/ADT/IndexedMap.h @@ -20,67 +20,56 @@ #ifndef LLVM_ADT_INDEXEDMAP_H #define LLVM_ADT_INDEXEDMAP_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/identity.h" #include namespace llvm { -template > - class IndexedMap { - using IndexT = typename ToIndexT::argument_type; - // Prefer SmallVector with zero inline storage over std::vector. IndexedMaps - // can grow very large and SmallVector grows more efficiently as long as T - // is trivially copyable. - using StorageT = SmallVector; - - StorageT storage_; - T nullVal_; - ToIndexT toIndex_; - - public: - IndexedMap() : nullVal_(T()) {} - - explicit IndexedMap(const T& val) : nullVal_(val) {} - - typename StorageT::reference operator[](IndexT n) { - assert(toIndex_(n) < storage_.size() && "index out of bounds!"); - return storage_[toIndex_(n)]; - } - - typename StorageT::const_reference operator[](IndexT n) const { - assert(toIndex_(n) < storage_.size() && "index out of bounds!"); - return storage_[toIndex_(n)]; - } - - void reserve(typename StorageT::size_type s) { - storage_.reserve(s); - } - - void resize(typename StorageT::size_type s) { - storage_.resize(s, nullVal_); - } - - void clear() { - storage_.clear(); - } - - void grow(IndexT n) { - unsigned NewSize = toIndex_(n) + 1; - if (NewSize > storage_.size()) - resize(NewSize); - } - - bool inBounds(IndexT n) const { - return toIndex_(n) < storage_.size(); - } - - typename StorageT::size_type size() const { - return storage_.size(); - } - }; - -} // end namespace llvm +template > class IndexedMap { + using IndexT = typename ToIndexT::argument_type; + // Prefer SmallVector with zero inline storage over std::vector. IndexedMaps + // can grow very large and SmallVector grows more efficiently as long as T + // is trivially copyable. + using StorageT = SmallVector; + + StorageT storage_; + T nullVal_; + ToIndexT toIndex_; + +public: + IndexedMap() : nullVal_(T()) {} + + explicit IndexedMap(const T &val) : nullVal_(val) {} + + typename StorageT::reference operator[](IndexT n) { + assert(toIndex_(n) < storage_.size() && "index out of bounds!"); + return storage_[toIndex_(n)]; + } + + typename StorageT::const_reference operator[](IndexT n) const { + assert(toIndex_(n) < storage_.size() && "index out of bounds!"); + return storage_[toIndex_(n)]; + } + + void reserve(typename StorageT::size_type s) { storage_.reserve(s); } + + void resize(typename StorageT::size_type s) { storage_.resize(s, nullVal_); } + + void clear() { storage_.clear(); } + + void grow(IndexT n) { + unsigned NewSize = toIndex_(n) + 1; + if (NewSize > storage_.size()) + resize(NewSize); + } + + bool inBounds(IndexT n) const { return toIndex_(n) < storage_.size(); } + + typename StorageT::size_type size() const { return storage_.size(); } +}; + +} // namespace llvm #endif // LLVM_ADT_INDEXEDMAP_H diff --git a/llvm/include/llvm/ADT/SparseMultiSet.h b/llvm/include/llvm/ADT/SparseMultiSet.h index d8dbe4023ea64..cf7603158b28b 100644 --- a/llvm/include/llvm/ADT/SparseMultiSet.h +++ b/llvm/include/llvm/ADT/SparseMultiSet.h @@ -21,9 +21,9 @@ #ifndef LLVM_ADT_SPARSEMULTISET_H #define LLVM_ADT_SPARSEMULTISET_H -#include "llvm/ADT/identity.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/identity.h" #include #include #include @@ -80,9 +80,8 @@ namespace llvm { /// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT. /// @tparam SparseT An unsigned integer type. See above. /// -template, - typename SparseT = uint8_t> +template , + typename SparseT = uint8_t> class SparseMultiSet { static_assert(std::is_unsigned_v, "SparseT must be an unsigned integer type"); @@ -103,14 +102,10 @@ class SparseMultiSet { SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) {} /// List tails have invalid Nexts. - bool isTail() const { - return Next == INVALID; - } + bool isTail() const { return Next == INVALID; } /// Whether this node is a tombstone node, and thus is in our freelist. - bool isTombstone() const { - return Prev == INVALID; - } + bool isTombstone() const { return Prev == INVALID; } /// Since the list is circular in Prev, all non-tombstone nodes have a valid /// Prev. @@ -156,7 +151,7 @@ class SparseMultiSet { /// Add in the given SMSNode. Uses a free entry in our freelist if /// available. Returns the index of the added node. - unsigned addValue(const ValueT& V, unsigned Prev, unsigned Next) { + unsigned addValue(const ValueT &V, unsigned Prev, unsigned Next) { if (NumFree == 0) { Dense.push_back(SMSNode(V, Prev, Next)); return Dense.size() - 1; @@ -204,13 +199,13 @@ class SparseMultiSet { // seem like a likely use case, so we can add that code when we need it. assert(empty() && "Can only resize universe on an empty map"); // Hysteresis prevents needless reallocations. - if (U >= Universe/4 && U <= Universe) + if (U >= Universe / 4 && U <= Universe) return; free(Sparse); // The Sparse array doesn't actually need to be initialized, so malloc // would be enough here, but that will cause tools like valgrind to // complain about branching on uninitialized data. - Sparse = static_cast(safe_calloc(U, sizeof(SparseT))); + Sparse = static_cast(safe_calloc(U, sizeof(SparseT))); Universe = U; } @@ -232,7 +227,7 @@ class SparseMultiSet { unsigned SparseIdx; iterator_base(SMSPtrTy P, unsigned I, unsigned SI) - : SMS(P), Idx(I), SparseIdx(SI) {} + : SMS(P), Idx(I), SparseIdx(SI) {} /// Whether our iterator has fallen outside our dense vector. bool isEnd() const { @@ -273,9 +268,7 @@ class SparseMultiSet { return false; } - bool operator!=(const iterator_base &RHS) const { - return !operator==(RHS); - } + bool operator!=(const iterator_base &RHS) const { return !operator==(RHS); } /// Increment and decrement operators iterator_base &operator--() { // predecrement - Back up @@ -372,12 +365,10 @@ class SparseMultiSet { /// @param Key A valid key to find. /// @returns An iterator to the element identified by key, or end(). /// - iterator find(const KeyT &Key) { - return findIndex(KeyIndexOf(Key)); - } + iterator find(const KeyT &Key) { return findIndex(KeyIndexOf(Key)); } const_iterator find(const KeyT &Key) const { - iterator I = const_cast(this)->findIndex(KeyIndexOf(Key)); + iterator I = const_cast(this)->findIndex(KeyIndexOf(Key)); return const_iterator(I.SMS, I.Idx, KeyIndexOf(Key)); } @@ -392,9 +383,7 @@ class SparseMultiSet { } /// Returns true if this set contains an element identified by Key. - bool contains(const KeyT &Key) const { - return find(Key) != end(); - } + bool contains(const KeyT &Key) const { return find(Key) != end(); } /// Return the head and tail of the subset's list, otherwise returns end(). iterator getHead(const KeyT &Key) { return find(Key); } @@ -517,6 +506,6 @@ class SparseMultiSet { } }; -} // end namespace llvm +} // namespace llvm #endif // LLVM_ADT_SPARSEMULTISET_H diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h index a8ebc9f786486..395cfc3ebfd43 100644 --- a/llvm/include/llvm/ADT/SparseSet.h +++ b/llvm/include/llvm/ADT/SparseSet.h @@ -20,8 +20,8 @@ #ifndef LLVM_ADT_SPARSESET_H #define LLVM_ADT_SPARSESET_H -#include "llvm/ADT/identity.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/identity.h" #include "llvm/Support/AllocatorBase.h" #include #include @@ -53,8 +53,7 @@ namespace llvm { /// /// For best results, ValueT should not require a destructor. /// -template -struct SparseSetValTraits { +template struct SparseSetValTraits { static unsigned getValIndex(const ValueT &Val) { return Val.getSparseSetIndex(); } @@ -64,7 +63,7 @@ struct SparseSetValTraits { /// generic implementation handles ValueT classes which either provide /// getSparseSetIndex() or specialize SparseSetValTraits<>. /// -template +template struct SparseSetValFunctor { unsigned operator()(const ValueT &Val) const { return SparseSetValTraits::getValIndex(Val); @@ -73,11 +72,9 @@ struct SparseSetValFunctor { /// SparseSetValFunctor - Helper class for the common case of /// identity key/value sets. -template +template struct SparseSetValFunctor { - unsigned operator()(const KeyT &Key) const { - return KeyFunctorT()(Key); - } + unsigned operator()(const KeyT &Key) const { return KeyFunctorT()(Key); } }; /// SparseSet - Fast set implementation for objects that can be identified by @@ -118,9 +115,8 @@ struct SparseSetValFunctor { /// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT. /// @tparam SparseT An unsigned integer type. See above. /// -template, - typename SparseT = uint8_t> +template , + typename SparseT = uint8_t> class SparseSet { static_assert(std::is_unsigned_v, "SparseT must be an unsigned integer type"); @@ -162,7 +158,7 @@ class SparseSet { // seem like a likely use case, so we can add that code when we need it. assert(empty() && "Can only resize universe on an empty map"); // Hysteresis prevents needless reallocations. - if (U >= Universe/4 && U <= Universe) + if (U >= Universe / 4 && U <= Universe) return; // The Sparse array doesn't actually need to be initialized, so malloc // would be enough here, but that will cause tools like valgrind to @@ -226,12 +222,10 @@ class SparseSet { /// @param Key A valid key to find. /// @returns An iterator to the element identified by key, or end(). /// - iterator find(const KeyT &Key) { - return findIndex(KeyIndexOf(Key)); - } + iterator find(const KeyT &Key) { return findIndex(KeyIndexOf(Key)); } const_iterator find(const KeyT &Key) const { - return const_cast(this)->findIndex(KeyIndexOf(Key)); + return const_cast(this)->findIndex(KeyIndexOf(Key)); } /// Check if the set contains the given \c Key. @@ -267,9 +261,7 @@ class SparseSet { /// array subscript - If an element already exists with this key, return it. /// Otherwise, automatically construct a new value from Key, insert it, /// and return the newly inserted element. - ValueT &operator[](const KeyT &Key) { - return *insert(ValueT(Key)).first; - } + ValueT &operator[](const KeyT &Key) { return *insert(ValueT(Key)).first; } ValueT pop_back_val() { // Sparse does not need to be cleared, see find(). @@ -318,6 +310,6 @@ class SparseSet { } }; -} // end namespace llvm +} // namespace llvm #endif // LLVM_ADT_SPARSESET_H diff --git a/llvm/include/llvm/ADT/identity.h b/llvm/include/llvm/ADT/identity.h index 7309032362077..88d033fc01141 100644 --- a/llvm/include/llvm/ADT/identity.h +++ b/llvm/include/llvm/ADT/identity.h @@ -15,7 +15,6 @@ #ifndef LLVM_ADT_IDENTITY_H #define LLVM_ADT_IDENTITY_H - namespace llvm { // Similar to `std::identity` from C++20. @@ -23,14 +22,10 @@ template struct identity { using is_transparent = void; using argument_type = Ty; - Ty &operator()(Ty &self) const { - return self; - } - const Ty &operator()(const Ty &self) const { - return self; - } + Ty &operator()(Ty &self) const { return self; } + const Ty &operator()(const Ty &self) const { return self; } }; -} // end namespace llvm +} // namespace llvm #endif // LLVM_ADT_IDENTITY_H From 13daa1e6efdbdc322265fda7ad8f5b265d2ab4aa Mon Sep 17 00:00:00 2001 From: Roy Shi Date: Thu, 11 Sep 2025 22:17:21 -0700 Subject: [PATCH 076/734] [lldb-dap] Add `debugAdapterEnv` for `attach` requests & improve regex (#157980) # Changes #153536 added a new debug configuration field called `debugAdapterEnv` and enabled it in `launch.json` **for `launch` requests**. This patch enables the same for **`attach` requests**. This patch also improves the regex used in this field, i.e. shortens it and fixes the double backslashes (`\\`) in `debug-adapter-factory.ts` (note: the ones in `package.json` need the double backslashes). # Test Manually tested the following values in `attach` requests (so that we are testing both changes at the same time): ``` // Accepted "debugAdapterEnv": [ "AAA=BBB", ], "debugAdapterEnv": [ "AAA=", ], "debugAdapterEnv": [ "AAA", ], // Rejected "debugAdapterEnv": [ "=AAA", ], "debugAdapterEnv": [ "=", ], "debugAdapterEnv": [ "", ], ``` --- lldb/tools/lldb-dap/package.json | 29 +++++++++++++++++-- .../lldb-dap/src-ts/debug-adapter-factory.ts | 2 +- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index 9cc653cee405b..0290a5f18f800 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -409,7 +409,7 @@ "anyOf": [ { "type": "object", - "markdownDescription": "Additional environment variables to set when launching the debug adapter executable. E.g. `{ \"FOO\": \"1\" }`", + "markdownDescription": "Additional environment variables to set when launching the debug adapter executable. For example `{ \"FOO\": \"1\" }`", "patternProperties": { ".*": { "type": "string" @@ -419,10 +419,10 @@ }, { "type": "array", - "markdownDescription": "Additional environment variables to set when launching the debug adapter executable. E.g. `[\"FOO=1\", \"BAR\"]`", + "markdownDescription": "Additional environment variables to set when launching the debug adapter executable. For example `[\"FOO=1\", \"BAR\"]`", "items": { "type": "string", - "pattern": "^((\\w+=.*)|^\\w+)$" + "pattern": "^\\w+(=.*)?$" }, "default": [] } @@ -672,6 +672,29 @@ }, "markdownDescription": "The list of additional arguments used to launch the debug adapter executable. Overrides any user or workspace settings." }, + "debugAdapterEnv": { + "anyOf": [ + { + "type": "object", + "markdownDescription": "Additional environment variables to set when launching the debug adapter executable. For example `{ \"FOO\": \"1\" }`", + "patternProperties": { + ".*": { + "type": "string" + } + }, + "default": {} + }, + { + "type": "array", + "markdownDescription": "Additional environment variables to set when launching the debug adapter executable. For example `[\"FOO=1\", \"BAR\"]`", + "items": { + "type": "string", + "pattern": "^\\w+(=.*)?$" + }, + "default": [] + } + ] + }, "program": { "type": "string", "description": "Path to the program to attach to." diff --git a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts index f7e92ee95ca32..7060638a94864 100644 --- a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts +++ b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts @@ -92,7 +92,7 @@ function validateDAPEnv(debugConfigEnv: any): boolean { Array.isArray(debugConfigEnv) && debugConfigEnv.findIndex( (entry) => - typeof entry !== "string" || !/^((\\w+=.*)|^\\w+)$/.test(entry), + typeof entry !== "string" || !/^\w+(=.*)?$/.test(entry), ) !== -1 ) { return false; From 586c0ad918aa3e725224246260b458b4b90615c2 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Fri, 12 Sep 2025 07:03:49 +0100 Subject: [PATCH 077/734] [WebAssembly] Support partial-reduce accumulator (#158060) We currently only support partial.reduce.add in the case where we are performing a multiply-accumulate. Now add support for any partial reduction where the input is being extended, where we can take advantage of extadd_pairwise. --- .../lib/Target/WebAssembly/WebAssemblyISD.def | 1 + .../WebAssembly/WebAssemblyISelLowering.cpp | 180 +++--- .../WebAssembly/WebAssemblyInstrSIMD.td | 9 +- .../WebAssemblyTargetTransformInfo.cpp | 27 +- .../WebAssembly/partial-reduce-accumulate.ll | 609 ++++++++++++++++++ 5 files changed, 736 insertions(+), 90 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/partial-reduce-accumulate.ll diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index 1eae3586d16b8..23108e429eda8 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -28,6 +28,7 @@ HANDLE_NODETYPE(BR_IF) HANDLE_NODETYPE(BR_TABLE) HANDLE_NODETYPE(DOT) HANDLE_NODETYPE(EXT_ADD_PAIRWISE_U) +HANDLE_NODETYPE(EXT_ADD_PAIRWISE_S) HANDLE_NODETYPE(SHUFFLE) HANDLE_NODETYPE(SWIZZLE) HANDLE_NODETYPE(VEC_SHL) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index fe100dab427ef..aea27ba32d37e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -422,24 +422,30 @@ bool WebAssemblyTargetLowering::shouldExpandPartialReductionIntrinsic( return true; EVT VT = EVT::getEVT(I->getType()); + if (VT.getSizeInBits() > 128) + return true; + auto Op1 = I->getOperand(1); if (auto *InputInst = dyn_cast(Op1)) { - if (InstructionOpcodeToISD(InputInst->getOpcode()) != ISD::MUL) - return true; - - if (isa(InputInst->getOperand(0)) && - isa(InputInst->getOperand(1))) { - // dot only supports signed inputs but also support lowering unsigned. - if (cast(InputInst->getOperand(0))->getOpcode() != - cast(InputInst->getOperand(1))->getOpcode()) - return true; - - EVT Op1VT = EVT::getEVT(Op1->getType()); - if (Op1VT.getVectorElementType() == VT.getVectorElementType() && - ((VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount()) || - (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount()))) - return false; + unsigned Opcode = InstructionOpcodeToISD(InputInst->getOpcode()); + if (Opcode == ISD::MUL) { + if (isa(InputInst->getOperand(0)) && + isa(InputInst->getOperand(1))) { + // dot only supports signed inputs but also support lowering unsigned. + if (cast(InputInst->getOperand(0))->getOpcode() != + cast(InputInst->getOperand(1))->getOpcode()) + return true; + + EVT Op1VT = EVT::getEVT(Op1->getType()); + if (Op1VT.getVectorElementType() == VT.getVectorElementType() && + ((VT.getVectorElementCount() * 2 == + Op1VT.getVectorElementCount()) || + (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount()))) + return false; + } + } else if (ISD::isExtOpcode(Opcode)) { + return false; } } return true; @@ -2117,77 +2123,93 @@ SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG) { assert(N->getValueType(0) == MVT::v4i32 && "can only support v4i32"); SDLoc DL(N); - SDValue Mul = N->getOperand(2); - assert(Mul->getOpcode() == ISD::MUL && "expected mul input"); - - SDValue ExtendLHS = Mul->getOperand(0); - SDValue ExtendRHS = Mul->getOperand(1); - assert((ISD::isExtOpcode(ExtendLHS.getOpcode()) && - ISD::isExtOpcode(ExtendRHS.getOpcode())) && - "expected widening mul"); - assert(ExtendLHS.getOpcode() == ExtendRHS.getOpcode() && - "expected mul to use the same extend for both operands"); - - SDValue ExtendInLHS = ExtendLHS->getOperand(0); - SDValue ExtendInRHS = ExtendRHS->getOperand(0); - bool IsSigned = ExtendLHS->getOpcode() == ISD::SIGN_EXTEND; - - if (ExtendInLHS->getValueType(0) == MVT::v8i16) { - if (IsSigned) { - // i32x4.dot_i16x8_s - SDValue Dot = DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, - ExtendInLHS, ExtendInRHS); - return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Dot); - } - unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U; - unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U; + SDValue Input = N->getOperand(2); + if (Input->getOpcode() == ISD::MUL) { + SDValue ExtendLHS = Input->getOperand(0); + SDValue ExtendRHS = Input->getOperand(1); + assert((ISD::isExtOpcode(ExtendLHS.getOpcode()) && + ISD::isExtOpcode(ExtendRHS.getOpcode())) && + "expected widening mul or add"); + assert(ExtendLHS.getOpcode() == ExtendRHS.getOpcode() && + "expected binop to use the same extend for both operands"); + + SDValue ExtendInLHS = ExtendLHS->getOperand(0); + SDValue ExtendInRHS = ExtendRHS->getOperand(0); + bool IsSigned = ExtendLHS->getOpcode() == ISD::SIGN_EXTEND; + unsigned LowOpc = + IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U; + unsigned HighOpc = IsSigned ? WebAssemblyISD::EXTEND_HIGH_S + : WebAssemblyISD::EXTEND_HIGH_U; + SDValue LowLHS; + SDValue LowRHS; + SDValue HighLHS; + SDValue HighRHS; + + auto AssignInputs = [&](MVT VT) { + LowLHS = DAG.getNode(LowOpc, DL, VT, ExtendInLHS); + LowRHS = DAG.getNode(LowOpc, DL, VT, ExtendInRHS); + HighLHS = DAG.getNode(HighOpc, DL, VT, ExtendInLHS); + HighRHS = DAG.getNode(HighOpc, DL, VT, ExtendInRHS); + }; - // (add (add (extmul_low_sx lhs, rhs), (extmul_high_sx lhs, rhs))) - SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInLHS); - SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInRHS); - SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInLHS); - SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInRHS); + if (ExtendInLHS->getValueType(0) == MVT::v8i16) { + if (IsSigned) { + // i32x4.dot_i16x8_s + SDValue Dot = DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, + ExtendInLHS, ExtendInRHS); + return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Dot); + } - SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v4i32, LowLHS, LowRHS); - SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v4i32, HighLHS, HighRHS); - SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, MulLow, MulHigh); - return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add); + // (add (add (extmul_low_sx lhs, rhs), (extmul_high_sx lhs, rhs))) + MVT VT = MVT::v4i32; + AssignInputs(VT); + SDValue MulLow = DAG.getNode(ISD::MUL, DL, VT, LowLHS, LowRHS); + SDValue MulHigh = DAG.getNode(ISD::MUL, DL, VT, HighLHS, HighRHS); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, MulLow, MulHigh); + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(1), Add); + } else { + assert(ExtendInLHS->getValueType(0) == MVT::v16i8 && + "expected v16i8 input types"); + AssignInputs(MVT::v8i16); + // Lower to a wider tree, using twice the operations compared to above. + if (IsSigned) { + // Use two dots + SDValue DotLHS = + DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, LowLHS, LowRHS); + SDValue DotRHS = + DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, HighLHS, HighRHS); + SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, DotLHS, DotRHS); + return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add); + } + + SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS); + SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS); + + SDValue AddLow = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, + MVT::v4i32, MulLow); + SDValue AddHigh = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, + MVT::v4i32, MulHigh); + SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh); + return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add); + } } else { - assert(ExtendInLHS->getValueType(0) == MVT::v16i8 && - "expected v16i8 input types"); - // Lower to a wider tree, using twice the operations compared to above. - if (IsSigned) { - // Use two dots - unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_S; - unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_S; - SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS); - SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS); - SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS); - SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS); - SDValue DotLHS = - DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, LowLHS, LowRHS); - SDValue DotRHS = - DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, HighLHS, HighRHS); - SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, DotLHS, DotRHS); + // Accumulate the input using extadd_pairwise. + assert(ISD::isExtOpcode(Input.getOpcode()) && "expected extend"); + bool IsSigned = Input->getOpcode() == ISD::SIGN_EXTEND; + unsigned PairwiseOpc = IsSigned ? WebAssemblyISD::EXT_ADD_PAIRWISE_S + : WebAssemblyISD::EXT_ADD_PAIRWISE_U; + SDValue ExtendIn = Input->getOperand(0); + if (ExtendIn->getValueType(0) == MVT::v8i16) { + SDValue Add = DAG.getNode(PairwiseOpc, DL, MVT::v4i32, ExtendIn); return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add); } - unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U; - unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U; - SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS); - SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS); - SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS); - SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS); - - SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS); - SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS); - - SDValue AddLow = - DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, MVT::v4i32, MulLow); - SDValue AddHigh = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, - MVT::v4i32, MulHigh); - SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh); + assert(ExtendIn->getValueType(0) == MVT::v16i8 && + "expected v16i8 input types"); + SDValue Add = + DAG.getNode(PairwiseOpc, DL, MVT::v4i32, + DAG.getNode(PairwiseOpc, DL, MVT::v8i16, ExtendIn)); return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 3c26b453c4482..d8948ad2df037 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1454,12 +1454,13 @@ def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>; // Extended pairwise addition def extadd_pairwise_u : SDNode<"WebAssemblyISD::EXT_ADD_PAIRWISE_U", extend_t>; +def extadd_pairwise_s : SDNode<"WebAssemblyISD::EXT_ADD_PAIRWISE_S", extend_t>; -defm "" : SIMDConvert; defm "" : SIMDConvert; -defm "" : SIMDConvert; defm "" : SIMDConvert; @@ -1468,6 +1469,10 @@ def : Pat<(v4i32 (int_wasm_extadd_pairwise_unsigned (v8i16 V128:$in))), (extadd_pairwise_u_I32x4 V128:$in)>; def : Pat<(v8i16 (int_wasm_extadd_pairwise_unsigned (v16i8 V128:$in))), (extadd_pairwise_u_I16x8 V128:$in)>; +def : Pat<(v4i32 (int_wasm_extadd_pairwise_signed (v8i16 V128:$in))), + (extadd_pairwise_s_I32x4 V128:$in)>; +def : Pat<(v8i16 (int_wasm_extadd_pairwise_signed (v16i8 V128:$in))), + (extadd_pairwise_s_I16x8 V128:$in)>; // f64x2 <-> f32x4 conversions def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 0eefd3e2b3500..92a9812df2127 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -316,7 +316,13 @@ InstructionCost WebAssemblyTTIImpl::getPartialReductionCost( if (CostKind != TTI::TCK_RecipThroughput) return Invalid; - InstructionCost Cost(TTI::TCC_Basic); + if (Opcode != Instruction::Add) + return Invalid; + + EVT AccumEVT = EVT::getEVT(AccumType); + // TODO: Add i64 accumulator. + if (AccumEVT != MVT::i32) + return Invalid; // Possible options: // - i16x8.extadd_pairwise_i8x16_sx @@ -324,23 +330,26 @@ InstructionCost WebAssemblyTTIImpl::getPartialReductionCost( // - i32x4.dot_i16x8_s // Only try to support dot, for now. - if (Opcode != Instruction::Add) + EVT InputEVT = EVT::getEVT(InputTypeA); + if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) || + (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) { return Invalid; + } - if (!BinOp || *BinOp != Instruction::Mul) + if (OpAExtend == TTI::PR_None) return Invalid; - if (InputTypeA != InputTypeB) - return Invalid; + InstructionCost Cost(TTI::TCC_Basic); + if (!BinOp) + return Cost; if (OpAExtend != OpBExtend) return Invalid; - EVT InputEVT = EVT::getEVT(InputTypeA); - EVT AccumEVT = EVT::getEVT(AccumType); + if (*BinOp != Instruction::Mul) + return Invalid; - // TODO: Add i64 accumulator. - if (AccumEVT != MVT::i32) + if (InputTypeA != InputTypeB) return Invalid; // Signed inputs can lower to dot diff --git a/llvm/test/CodeGen/WebAssembly/partial-reduce-accumulate.ll b/llvm/test/CodeGen/WebAssembly/partial-reduce-accumulate.ll new file mode 100644 index 0000000000000..47ea762864cc2 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/partial-reduce-accumulate.ll @@ -0,0 +1,609 @@ +; RUN: opt -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -mattr=+simd128 -verify-machineinstrs -o - | FileCheck %s +; RUN: opt -mattr=+simd128 -passes=loop-vectorize -vectorizer-maximize-bandwidth %s | llc -mtriple=wasm32 -mattr=+simd128 -verify-machineinstrs -o - | FileCheck %s --check-prefix=MAX-BANDWIDTH + +target triple = "wasm32" + +define hidden i32 @accumulate_add_u8_u8(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_add_u8_u8: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_u +; CHECK: i32x4.extend_low_i16x8_u +; CHECK: i32x4.add +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_u +; CHECK: i32x4.extend_low_i16x8_u +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; MAX-BANDWIDTH: i32x4.add + +entry: + %cmp8.not = icmp eq i32 %N, 0 + br i1 %cmp8.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.010 + %0 = load i8, ptr %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.010 + %1 = load i8, ptr %arrayidx1, align 1 + %conv2 = zext i8 %1 to i32 + %add = add i32 %result.09, %conv + %add3 = add i32 %add, %conv2 + %inc = add nuw i32 %i.010, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_add_s8_s8(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_add_s8_s8: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.add +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp8.not = icmp eq i32 %N, 0 + br i1 %cmp8.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.010 + %0 = load i8, ptr %arrayidx, align 1 + %conv = sext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.010 + %1 = load i8, ptr %arrayidx1, align 1 + %conv2 = sext i8 %1 to i32 + %add = add i32 %result.09, %conv + %add3 = add i32 %add, %conv2 + %inc = add nuw i32 %i.010, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_add_s8_u8(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_add_s8_u8: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.add +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_u +; CHECK: i32x4.extend_low_i16x8_u +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp8.not = icmp eq i32 %N, 0 + br i1 %cmp8.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.010 + %0 = load i8, ptr %arrayidx, align 1 + %conv = sext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.010 + %1 = load i8, ptr %arrayidx1, align 1 + %conv2 = zext i8 %1 to i32 + %add = add i32 %result.09, %conv + %add3 = add i32 %add, %conv2 + %inc = add nuw i32 %i.010, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_add_s8_s16(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_add_s8_s16: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.add +; CHECK: i32x4.load16x4_s +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp8.not = icmp eq i32 %N, 0 + br i1 %cmp8.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.010 + %0 = load i8, ptr %arrayidx, align 1 + %conv = sext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds nuw i16, ptr %b, i32 %i.010 + %1 = load i16, ptr %arrayidx1, align 2 + %conv2 = sext i16 %1 to i32 + %add = add i32 %result.09, %conv + %add3 = add i32 %add, %conv2 + %inc = add nuw i32 %i.010, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_shr_u8(ptr noundef readonly %a, i32 noundef %N) { +; CHECK-LABEL: accumulate_shr_u8: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: i8x16.shr_u +; CHECK: i16x8.extend_low_i8x16_u +; CHECK: i32x4.extend_low_i16x8_u +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i8x16.shr_u +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp4.not = icmp eq i32 %N, 0 + br i1 %cmp4.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.06 + %0 = load i8, ptr %arrayidx, align 1 + %1 = lshr i8 %0, 1 + %shr = zext nneg i8 %1 to i32 + %add = add i32 %result.05, %shr + %inc = add nuw i32 %i.06, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_shr_s8(ptr noundef readonly %a, i32 noundef %N) { +; CHECK-LABEL: accumulate_shr_s8: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: i8x16.shr_s +; CHECK: i16x8.extend_low_i8x16_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i8x16.shr_s +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp4.not = icmp eq i32 %N, 0 + br i1 %cmp4.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.06 + %0 = load i8, ptr %arrayidx, align 1 + %1 = ashr i8 %0, 1 + %shr = sext i8 %1 to i32 + %add = add nsw i32 %result.05, %shr + %inc = add nuw i32 %i.06, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_max_u8_u8(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_max_u8_u8: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: v128.load32_zero +; CHECK: i8x16.max_u +; CHECK: i16x8.extend_low_i8x16_u +; CHECK: i32x4.extend_low_i16x8_u +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i8x16.max_u +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_u +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp17.not = icmp eq i32 %N, 0 + br i1 %cmp17.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.019 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.018 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.019 + %0 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.019 + %1 = load i8, ptr %arrayidx1, align 1 + %. = tail call i8 @llvm.umax.i8(i8 %0, i8 %1) + %cond = zext i8 %. to i32 + %add = add i32 %result.018, %cond + %inc = add nuw i32 %i.019, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_min_s8_s8(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_min_s8_s8: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: v128.load32_zero +; CHECK: i8x16.min_s +; CHECK: i16x8.extend_low_i8x16_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i8x16.min_s +; MAX-BANDWIDTH: i16x8.extadd_pairwise_i8x16_s +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp17.not = icmp eq i32 %N, 0 + br i1 %cmp17.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.019 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.018 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.019 + %0 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.019 + %1 = load i8, ptr %arrayidx1, align 1 + %. = tail call i8 @llvm.smin.i8(i8 %0, i8 %1) + %cond = sext i8 %. to i32 + %add = add nsw i32 %result.018, %cond + %inc = add nuw i32 %i.019, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_add_u16_u16(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_add_u16_u16: +; CHECK: loop +; CHECK: i32x4.load16x4_u +; CHECK: i32x4.add +; CHECK: i32x4.load16x4_u +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp8.not = icmp eq i32 %N, 0 + br i1 %cmp8.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.010 + %0 = load i16, ptr %arrayidx, align 2 + %conv = zext i16 %0 to i32 + %arrayidx1 = getelementptr inbounds nuw i16, ptr %b, i32 %i.010 + %1 = load i16, ptr %arrayidx1, align 2 + %conv2 = zext i16 %1 to i32 + %add = add i32 %result.09, %conv + %add3 = add i32 %add, %conv2 + %inc = add nuw i32 %i.010, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_add_s16_s16(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_add_s16_s16: +; CHECK: loop +; CHECK: i32x4.load16x4_s +; CHECK: i32x4.add +; CHECK: i32x4.load16x4_s +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp8.not = icmp eq i32 %N, 0 + br i1 %cmp8.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.09 = phi i32 [ %add3, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.010 + %0 = load i16, ptr %arrayidx, align 2 + %conv = sext i16 %0 to i32 + %arrayidx1 = getelementptr inbounds nuw i16, ptr %b, i32 %i.010 + %1 = load i16, ptr %arrayidx1, align 2 + %conv2 = sext i16 %1 to i32 + %add = add i32 %result.09, %conv + %add3 = add i32 %add, %conv2 + %inc = add nuw i32 %i.010, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_shr_u16(ptr noundef readonly %a, i32 noundef %N) { +; CHECK-LABEL: accumulate_shr_u16: +; CHECK: loop +; CHECK: v128.load64_zero +; CHECK: i16x8.shr_u +; CHECK: i32x4.extend_low_i16x8_u +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i16x8.shr_u +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp4.not = icmp eq i32 %N, 0 + br i1 %cmp4.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.06 + %0 = load i16, ptr %arrayidx, align 2 + %1 = lshr i16 %0, 1 + %shr = zext nneg i16 %1 to i32 + %add = add i32 %result.05, %shr + %inc = add nuw i32 %i.06, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_shr_s16(ptr noundef readonly %a, i32 noundef %N) { +; CHECK-LABEL: accumulate_shr_s16: +; CHECK: loop +; CHECK: v128.load64_zero +; CHECK: i16x8.shr_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i16x8.shr_s +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s +; MAX-BANDWIDTH: i32x4.add +entry: + %cmp4.not = icmp eq i32 %N, 0 + br i1 %cmp4.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.06 + %0 = load i16, ptr %arrayidx, align 2 + %1 = ashr i16 %0, 1 + %shr = sext i16 %1 to i32 + %add = add nsw i32 %result.05, %shr + %inc = add nuw i32 %i.06, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_sub_s8_s8(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_sub_s8_s8: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.add +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.sub + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.sub +; MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.sub +; MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.sub +entry: + %cmp7.not = icmp eq i32 %N, 0 + br i1 %cmp7.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i8, ptr %a, i32 %i.09 + %0 = load i8, ptr %arrayidx, align 1 + %conv = sext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds nuw i8, ptr %b, i32 %i.09 + %1 = load i8, ptr %arrayidx1, align 1 + %conv2 = sext i8 %1 to i32 + %sub = add i32 %result.08, %conv + %add = sub i32 %sub, %conv2 + %inc = add nuw i32 %i.09, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define hidden i32 @accumulate_sub_s16_s16(ptr noundef readonly %a, ptr noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: accumulate_sub_s16_s16: +; CHECK: loop +; CHECK: i32x4.load16x4_s +; CHECK: i32x4.add +; CHECK: i32x4.load16x4_s +; CHECK: i32x4.sub + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s +; MAX-BANDWIDTH: i32x4.sub +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.sub +entry: + %cmp7.not = icmp eq i32 %N, 0 + br i1 %cmp7.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %result.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds nuw i16, ptr %a, i32 %i.09 + %0 = load i16, ptr %arrayidx, align 2 + %conv = sext i16 %0 to i32 + %arrayidx1 = getelementptr inbounds nuw i16, ptr %b, i32 %i.09 + %1 = load i16, ptr %arrayidx1, align 2 + %conv2 = sext i16 %1 to i32 + %sub = add i32 %result.08, %conv + %add = sub i32 %sub, %conv2 + %inc = add nuw i32 %i.09, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +declare i8 @llvm.umax.i8(i8, i8) + +declare i8 @llvm.smin.i8(i8, i8) From 73cfd45e3401b18730e283b8a13e21fb85de3a38 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Fri, 12 Sep 2025 08:04:44 +0200 Subject: [PATCH 078/734] [Clang] Set the FTM for trivial relocation (#142936) The language of side seems fairly stable. Setting the feature test macro will ease implementation in standard libraries. --- clang/lib/Frontend/InitPreprocessor.cpp | 2 +- clang/test/Lexer/cxx-features.cpp | 4 ++++ clang/www/cxx_status.html | 7 +------ 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index e65c8b4f6facf..edf0a091e087c 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -766,7 +766,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_pack_indexing", "202311L"); Builder.defineMacro("__cpp_deleted_function", "202403L"); Builder.defineMacro("__cpp_variadic_friend", "202403L"); - // Builder.defineMacro("__cpp_trivial_relocatability", "202502L"); + Builder.defineMacro("__cpp_trivial_relocatability", "202502L"); if (LangOpts.Char8) Builder.defineMacro("__cpp_char8_t", "202207L"); diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index 8c1867d5c7365..ced5bcaf0db16 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -49,6 +49,10 @@ #error "wrong value for __cpp_placeholder_variables" #endif +#if check(trivial_relocatability, 202502, 202502, 202502, 202502, 202502, 202502, 202502) +#error "wrong value for __cpp_trivial_relocatability" +#endif + // --- C++23 features --- #if check(auto_cast, 0, 0, 0, 0, 0, 202110, 202110) diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index bb7144b827c3c..25940cc2899c1 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -280,12 +280,7 @@

C++2c implementation status

Trivial Relocatability P2786R13 - -
- Clang 21 (Partial) - The feature test macro (__cpp_trivial_relocatability) has not yet been set. -
- + Clang 21
#embed
From 4bb250d6a3d63c41f5d539c9b9a162070ea5b619 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 12 Sep 2025 14:21:54 +0800 Subject: [PATCH 079/734] [VPlan] Always consider register pressure on RISC-V (#156951) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stacked on #156923 In https://godbolt.org/z/8svWaredK, we spill a lot on RISC-V because whilst the largest element type is i8, we generate a bunch of pointer vectors for gathers and scatters. This means the VF chosen is quite high e.g. , but we end up using a bunch of m8 registers for the pointers. This was briefly fixed by #132190 where we computed register pressure in VPlan and used it to prune VFs that were likely to spill. The legacy cost model wasn't able to do this pruning because it didn't have visibility into the pointer vectors that were needed for the gathers/scatters. However VF pruning was restricted again to just the case when max bandwidth was enabled in #141736 to avoid an AArch64 regression, and restricted again in #149056 to only prune VFs that had max bandwidth enabled. On RISC-V we take advantage of register grouping for performance and choose a default of LMUL 2, which means there are 16 registers to work with – half the number as SVE, so we encounter higher register pressure more frequently. As such, we likely want to always consider pruning VFs with high register pressure and not just the VFs from max bandwidth. This adds a TTI hook to opt into this behaviour for RISC-V which fixes the motivating godbolt example above. When last checked this significantly reduces the number of spills on SPEC CPU 2017, up to 80% on 538.imagick_r. --- .../llvm/Analysis/TargetTransformInfo.h | 4 + .../llvm/Analysis/TargetTransformInfoImpl.h | 2 + llvm/lib/Analysis/TargetTransformInfo.cpp | 4 + .../Target/RISCV/RISCVTargetTransformInfo.h | 2 + .../Transforms/Vectorize/LoopVectorize.cpp | 12 + .../LoopVectorize/RISCV/reg-usage-bf16.ll | 11 +- .../LoopVectorize/RISCV/reg-usage-f16.ll | 21 +- .../LoopVectorize/RISCV/reg-usage-prune-vf.ll | 233 ++++++++++++++++++ .../LoopVectorize/RISCV/reg-usage.ll | 68 ++--- 9 files changed, 309 insertions(+), 48 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index a5e98bb7bc137..a6f4e51e258ab 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1847,6 +1847,10 @@ class TargetTransformInfo { /// otherwise scalar epilogue loop. LLVM_ABI bool preferEpilogueVectorization() const; + /// \returns True if the loop vectorizer should discard any VFs where the + /// maximum register pressure exceeds getNumberOfRegisters. + LLVM_ABI bool shouldConsiderVectorizationRegPressure() const; + /// \returns True if the target wants to expand the given reduction intrinsic /// into a shuffle sequence. LLVM_ABI bool shouldExpandReduction(const IntrinsicInst *II) const; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index b58386b94bba4..566e1cf51631a 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1105,6 +1105,8 @@ class TargetTransformInfoImplBase { virtual bool preferEpilogueVectorization() const { return true; } + virtual bool shouldConsiderVectorizationRegPressure() const { return false; } + virtual bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 899806bf37348..09b50c5270e57 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1425,6 +1425,10 @@ bool TargetTransformInfo::preferEpilogueVectorization() const { return TTIImpl->preferEpilogueVectorization(); } +bool TargetTransformInfo::shouldConsiderVectorizationRegPressure() const { + return TTIImpl->shouldConsiderVectorizationRegPressure(); +} + TargetTransformInfo::VPLegalization TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { return TTIImpl->getVPLegalizationStrategy(VPI); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 6bd7d51daff69..47e0a250d285a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -141,6 +141,8 @@ class RISCVTTIImpl final : public BasicTTIImplBase { return false; } + bool shouldConsiderVectorizationRegPressure() const override { return true; } + InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b4acda80cfb93..c04b5cb10eac2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -393,6 +393,10 @@ static cl::opt EnableEarlyExitVectorization( cl::desc( "Enable vectorization of early exit loops with uncountable exits.")); +static cl::opt ConsiderRegPressure( + "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, + cl::desc("Discard VFs if their register pressure is too high.")); + // Likelyhood of bypassing the vectorized loop because there are zero trips left // after prolog. See `emitIterationCountCheck`. static constexpr uint32_t MinItersBypassWeights[] = {1, 127}; @@ -3693,6 +3697,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF( ElementCount VF) { + if (ConsiderRegPressure.getNumOccurrences()) + return ConsiderRegPressure; + + // TODO: We should eventually consider register pressure for all targets. The + // TTI hook is temporary whilst target-specific issues are being fixed. + if (TTI.shouldConsiderVectorizationRegPressure()) + return true; + if (!useMaxBandwidth(VF.isScalable() ? TargetTransformInfo::RGK_ScalableVector : TargetTransformInfo::RGK_FixedWidthVector)) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll index 346f1cbcc7e3d..097f05d222cf6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll @@ -1,14 +1,11 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s - -; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow -; unrolling. Calculate register pressure for all VPlans, not just unrolled ones, -; and remove. +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) { ; CHECK-LABEL: add -; CHECK: LV(REG): Found max usage: 2 item -; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK: LV(REG): VF = vscale x 4 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll index b25bc485a9ca7..8bbfdf39a0624 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll @@ -1,20 +1,19 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN - -; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow -; unrolling. Calculate register pressure for all VPlans, not just unrolled ones, -; and remove. +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) { -; CHECK-LABEL: add -; ZVFH: LV(REG): Found max usage: 2 item -; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; ZVFH-LABEL: add +; ZVFH: LV(REG): VF = vscale x 4 +; ZVFH-NEXT: LV(REG): Found max usage: 2 item +; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item ; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; ZVFHMIN: LV(REG): Found max usage: 2 item -; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; ZVFHMIN-LABEL: add +; ZVFHMIN: LV(REG): VF = vscale x 4 +; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item +; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item ; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll new file mode 100644 index 0000000000000..42f12ec2e4859 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll @@ -0,0 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -S < %s | FileCheck %s +; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=true -S < %s | FileCheck %s +; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=false -S < %s | FileCheck %s --check-prefix=NO-REG-PRESSURE-CHECK + +define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { +; CHECK-LABEL: define void @f( +; CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul [[TMP0]], splat (i64 2) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 4) +; CHECK-NEXT: [[INDUCTION2:%.*]] = add zeroinitializer, [[TMP5]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND3:%.*]] = phi [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND4:%.*]] = phi [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP9]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP11]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sub [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], [[TMP13]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i8.nxv4p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) +; CHECK-NEXT: [[TMP15:%.*]] = sub [[VEC_IND3]], splat (i64 1) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], [[TMP15]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call @llvm.vp.gather.nxv4i8.nxv4p0( align 1 [[TMP16]], splat (i1 true), i32 [[TMP6]]) +; CHECK-NEXT: [[TMP17:%.*]] = sub [[VEC_IND4]], splat (i64 1) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], [[TMP17]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call @llvm.vp.gather.nxv4i8.nxv4p0( align 1 [[TMP18]], splat (i1 true), i32 [[TMP6]]) +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0 +; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3 +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv12i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER9]], [[WIDE_MASKED_GATHER10]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv12i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT8]] +; CHECK-NEXT: [[VEC_IND_NEXT11]] = add [[VEC_IND3]], [[BROADCAST_SPLAT6]] +; CHECK-NEXT: [[VEC_IND_NEXT12]] = add [[VEC_IND4]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1 +; CHECK-NEXT: [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]] +; CHECK-NEXT: [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1 +; CHECK-NEXT: [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1 +; CHECK-NEXT: [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]] +; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1 +; CHECK-NEXT: [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1 +; CHECK-NEXT: [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]] +; CHECK-NEXT: [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1 +; CHECK-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV]], 3 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]] +; CHECK-NEXT: [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0 +; CHECK-NEXT: store i8 [[A]], ptr [[A_GEP1]], align 1 +; CHECK-NEXT: [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1 +; CHECK-NEXT: store i8 [[B]], ptr [[B_GEP1]], align 1 +; CHECK-NEXT: [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2 +; CHECK-NEXT: store i8 [[C]], ptr [[C_GEP1]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2 +; CHECK-NEXT: [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3 +; CHECK-NEXT: [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4 +; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 1024 +; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +; NO-REG-PRESSURE-CHECK-LABEL: define void @f( +; NO-REG-PRESSURE-CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] { +; NO-REG-PRESSURE-CHECK-NEXT: [[ENTRY:.*:]] +; NO-REG-PRESSURE-CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-REG-PRESSURE-CHECK: [[VECTOR_PH]]: +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.stepvector.nxv8i64() +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP1:%.*]] = mul [[TMP0]], splat (i64 2) +; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv8i64() +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 3) +; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP3]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv8i64() +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 4) +; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION2:%.*]] = add zeroinitializer, [[TMP5]] +; NO-REG-PRESSURE-CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-REG-PRESSURE-CHECK: [[VECTOR_BODY]]: +; NO-REG-PRESSURE-CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND3:%.*]] = phi [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND4:%.*]] = phi [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]] +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP9]] +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP6]] to i64 +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP11]] +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP13:%.*]] = sub [[VEC_IND]], splat (i64 1) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], [[TMP13]] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv8i8.nxv8p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP15:%.*]] = sub [[VEC_IND3]], splat (i64 1) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], [[TMP15]] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call @llvm.vp.gather.nxv8i8.nxv8p0( align 1 [[TMP16]], splat (i1 true), i32 [[TMP6]]) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP17:%.*]] = sub [[VEC_IND4]], splat (i64 1) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], [[TMP17]] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call @llvm.vp.gather.nxv8i8.nxv8p0( align 1 [[TMP18]], splat (i1 true), i32 [[TMP6]]) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3 +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0 +; NO-REG-PRESSURE-CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3 +; NO-REG-PRESSURE-CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv24i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER9]], [[WIDE_MASKED_GATHER10]]) +; NO-REG-PRESSURE-CHECK-NEXT: call void @llvm.vp.store.nxv24i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 +; NO-REG-PRESSURE-CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]] +; NO-REG-PRESSURE-CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT8]] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT11]] = add [[VEC_IND3]], [[BROADCAST_SPLAT6]] +; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT12]] = add [[VEC_IND4]], [[BROADCAST_SPLAT]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-REG-PRESSURE-CHECK: [[MIDDLE_BLOCK]]: +; NO-REG-PRESSURE-CHECK-NEXT: br label %[[EXIT:.*]] +; NO-REG-PRESSURE-CHECK: [[SCALAR_PH]]: +; NO-REG-PRESSURE-CHECK-NEXT: br label %[[LOOP:.*]] +; NO-REG-PRESSURE-CHECK: [[LOOP]]: +; NO-REG-PRESSURE-CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ] +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]] +; NO-REG-PRESSURE-CHECK-NEXT: [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]] +; NO-REG-PRESSURE-CHECK-NEXT: [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]] +; NO-REG-PRESSURE-CHECK-NEXT: [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV]], 3 +; NO-REG-PRESSURE-CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]] +; NO-REG-PRESSURE-CHECK-NEXT: [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0 +; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[A]], ptr [[A_GEP1]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1 +; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[B]], ptr [[B_GEP1]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2 +; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[C]], ptr [[C_GEP1]], align 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3 +; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4 +; NO-REG-PRESSURE-CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 1024 +; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; NO-REG-PRESSURE-CHECK: [[EXIT]]: +; NO-REG-PRESSURE-CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %wide.iv.0 = phi i64 [ 0, %entry ], [ %wide.iv.0.next, %loop ] + %wide.iv.1 = phi i64 [ 0, %entry ], [ %wide.iv.1.next, %loop ] + %wide.iv.2 = phi i64 [ 0, %entry ], [ %wide.iv.2.next, %loop ] + + %wide.iv.0.sub = sub i64 %wide.iv.0, 1 + %a.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.0.sub + %a = load i8, ptr %a.gep0 + + %wide.iv.1.sub = sub i64 %wide.iv.1, 1 + %b.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.1.sub + %b = load i8, ptr %b.gep0 + + %wide.iv.2.sub = sub i64 %wide.iv.2, 1 + %c.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.2.sub + %c = load i8, ptr %c.gep0 + + %iv.mul = mul i64 %iv, 3 + %base = getelementptr i8, ptr %p1, i64 %iv.mul + + %a.gep1 = getelementptr i8, ptr %base, i8 0 + store i8 %a, ptr %a.gep1 + + %b.gep1 = getelementptr i8, ptr %base, i8 1 + store i8 %b, ptr %b.gep1 + + %c.gep1 = getelementptr i8, ptr %base, i8 2 + store i8 %c, ptr %c.gep1 + + %iv.next = add i64 %iv, 1 + %wide.iv.0.next = add i64 %wide.iv.0, 2 + %wide.iv.1.next = add i64 %wide.iv.1, 3 + %wide.iv.2.next = add i64 %wide.iv.2, 4 + %done = icmp eq i64 %iv, 1024 + br i1 %done, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll index 116ccc9961795..99139da67bb78 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -5,50 +5,54 @@ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ ; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \ -; RUN: -riscv-v-register-bit-width-lmul=1 -prefer-predicate-over-epilogue=scalar-epilogue \ +; RUN: -riscv-v-register-bit-width-lmul=1 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ ; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \ -; RUN: -riscv-v-register-bit-width-lmul=2 -prefer-predicate-over-epilogue=scalar-epilogue \ +; RUN: -riscv-v-register-bit-width-lmul=2 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ ; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \ -; RUN: -riscv-v-register-bit-width-lmul=4 -prefer-predicate-over-epilogue=scalar-epilogue \ +; RUN: -riscv-v-register-bit-width-lmul=4 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ ; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \ -; RUN: -riscv-v-register-bit-width-lmul=8 -prefer-predicate-over-epilogue=scalar-epilogue \ +; RUN: -riscv-v-register-bit-width-lmul=8 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8 -; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow -; unrolling. Calculate register pressure for all VPlans, not just unrolled ones, -; and remove. - define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) { -; CHECK-LABEL: add +; CHECK-SCALAR-LABEL: add ; CHECK-SCALAR: LV(REG): VF = 1 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 2 item ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers ; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-LMUL1: LV(REG): Found max usage: 2 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK-LMUL1-LABEL: add +; CHECK-LMUL1: LV(REG): VF = vscale x 2 +; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-LMUL2: LV(REG): Found max usage: 2 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK-LMUL2-LABEL: add +; CHECK-LMUL2: LV(REG): VF = vscale x 4 +; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-LMUL4: LV(REG): Found max usage: 2 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK-LMUL4-LABEL: add +; CHECK-LMUL4: LV(REG): VF = vscale x 8 +; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers ; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-LMUL8: LV(REG): Found max usage: 2 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers +; CHECK-LMUL8-LABEL: add +; CHECK-LMUL8: LV(REG): VF = vscale x 16 +; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers ; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers @@ -76,22 +80,26 @@ for.body: } define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) { -; CHECK-LABEL: goo +; CHECK-SCALAR-LABEL: goo ; CHECK-SCALAR: LV(REG): VF = 1 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers -; CHECK-LMUL1: LV(REG): Found max usage: 2 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers -; CHECK-LMUL2: LV(REG): Found max usage: 2 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers -; CHECK-LMUL4: LV(REG): Found max usage: 2 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers -; CHECK-LMUL8: LV(REG): Found max usage: 2 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers +; CHECK-LMUL1: LV(REG): VF = vscale x 2 +; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers +; CHECK-LMUL2: LV(REG): VF = vscale x 4 +; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers +; CHECK-LMUL4: LV(REG): VF = vscale x 8 +; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers +; CHECK-LMUL8: LV(REG): VF = vscale x 16 +; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers entry: %cmp3 = icmp sgt i32 %n, 0 br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup From 76aba5d415fbf206e0d9443a5822fcd9244fa33f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 12 Sep 2025 09:11:08 +0200 Subject: [PATCH 080/734] [MC] Add parseSymbol() helper (NFC) (#158106) This combines parseIdentifier() + getOrCreateSymbol(). This should make it a bit easier if we want to change the parseIdentifier() API. --- llvm/include/llvm/MC/MCParser/MCAsmParser.h | 3 + llvm/lib/MC/MCParser/AsmParser.cpp | 53 ++++++----------- llvm/lib/MC/MCParser/COFFAsmParser.cpp | 66 +++++++-------------- llvm/lib/MC/MCParser/COFFMasmParser.cpp | 14 ++--- llvm/lib/MC/MCParser/DarwinAsmParser.cpp | 44 +++++--------- llvm/lib/MC/MCParser/ELFAsmParser.cpp | 32 ++++------ llvm/lib/MC/MCParser/MCAsmParser.cpp | 9 +++ llvm/lib/MC/MCParser/MasmParser.cpp | 22 +++---- llvm/lib/MC/MCParser/WasmAsmParser.cpp | 10 ++-- 9 files changed, 97 insertions(+), 156 deletions(-) diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index cb9bd5c600d52..e3f44a08db641 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -279,6 +279,9 @@ class LLVM_ABI MCAsmParser { /// Res to the identifier contents. virtual bool parseIdentifier(StringRef &Res) = 0; + /// Parse identifier and get or create symbol for it. + bool parseSymbol(MCSymbol *&Res); + /// Parse up to the end of statement and return the contents from the /// current token until the end of the statement; the current token on exit /// will be either the EndOfStatement or EOF. diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index fb183a10b3d37..5fa1539790c73 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -3876,20 +3876,15 @@ bool AsmParser::parseDirectiveCVLoc() { /// ::= .cv_linetable FunctionId, FnStart, FnEnd bool AsmParser::parseDirectiveCVLinetable() { int64_t FunctionId; - StringRef FnStartName, FnEndName; + MCSymbol *FnStartSym, *FnEndSym; SMLoc Loc = getTok().getLoc(); if (parseCVFunctionId(FunctionId, ".cv_linetable") || parseComma() || parseTokenLoc(Loc) || - check(parseIdentifier(FnStartName), Loc, - "expected identifier in directive") || + check(parseSymbol(FnStartSym), Loc, "expected identifier in directive") || parseComma() || parseTokenLoc(Loc) || - check(parseIdentifier(FnEndName), Loc, - "expected identifier in directive")) + check(parseSymbol(FnEndSym), Loc, "expected identifier in directive")) return true; - MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName); - MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName); - getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym); return false; } @@ -3898,7 +3893,7 @@ bool AsmParser::parseDirectiveCVLinetable() { /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd bool AsmParser::parseDirectiveCVInlineLinetable() { int64_t PrimaryFunctionId, SourceFileId, SourceLineNum; - StringRef FnStartName, FnEndName; + MCSymbol *FnStartSym, *FnEndSym; SMLoc Loc = getTok().getLoc(); if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") || parseTokenLoc(Loc) || @@ -3908,16 +3903,14 @@ bool AsmParser::parseDirectiveCVInlineLinetable() { parseIntToken(SourceLineNum, "expected SourceLineNum") || check(SourceLineNum < 0, Loc, "Line number less than zero") || parseTokenLoc(Loc) || - check(parseIdentifier(FnStartName), Loc, "expected identifier") || + check(parseSymbol(FnStartSym), Loc, "expected identifier") || parseTokenLoc(Loc) || - check(parseIdentifier(FnEndName), Loc, "expected identifier")) + check(parseSymbol(FnEndSym), Loc, "expected identifier")) return true; if (parseEOL()) return true; - MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName); - MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName); getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym); @@ -3938,16 +3931,14 @@ bool AsmParser::parseDirectiveCVDefRange() { std::vector> Ranges; while (getLexer().is(AsmToken::Identifier)) { Loc = getLexer().getLoc(); - StringRef GapStartName; - if (parseIdentifier(GapStartName)) + MCSymbol *GapStartSym; + if (parseSymbol(GapStartSym)) return Error(Loc, "expected identifier in directive"); - MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName); Loc = getLexer().getLoc(); - StringRef GapEndName; - if (parseIdentifier(GapEndName)) + MCSymbol *GapEndSym; + if (parseSymbol(GapEndSym)) return Error(Loc, "expected identifier in directive"); - MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName); Ranges.push_back({GapStartSym, GapEndSym}); } @@ -4084,12 +4075,11 @@ bool AsmParser::parseDirectiveCVFileChecksumOffset() { /// ::= .cv_fpo_data procsym bool AsmParser::parseDirectiveCVFPOData() { SMLoc DirLoc = getLexer().getLoc(); - StringRef ProcName; - if (parseIdentifier(ProcName)) + MCSymbol *ProcSym; + if (parseSymbol(ProcSym)) return TokError("expected symbol name"); if (parseEOL()) return true; - MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); getStreamer().emitCVFPOData(ProcSym, DirLoc); return false; } @@ -4311,15 +4301,12 @@ bool AsmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) { if (Encoding == dwarf::DW_EH_PE_omit) return false; - StringRef Name; + MCSymbol *Sym; if (check(!isValidEncoding(Encoding), "unsupported encoding.") || parseComma() || - check(parseIdentifier(Name), "expected identifier in directive") || - parseEOL()) + check(parseSymbol(Sym), "expected identifier in directive") || parseEOL()) return true; - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (IsPersonality) getStreamer().emitCFIPersonality(Sym, Encoding); else @@ -4920,13 +4907,10 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) { return true; SMLoc IDLoc = getLexer().getLoc(); - StringRef Name; - if (parseIdentifier(Name)) + MCSymbol *Sym; + if (parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (parseComma()) return true; @@ -5756,10 +5740,9 @@ bool AsmParser::parseDirectiveAddrsig() { } bool AsmParser::parseDirectiveAddrsigSym() { - StringRef Name; - if (check(parseIdentifier(Name), "expected identifier") || parseEOL()) + MCSymbol *Sym; + if (check(parseSymbol(Sym), "expected identifier") || parseEOL()) return true; - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitAddrsigSym(Sym); return false; } diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp index 9fb17488a9e9c..5dd79946d8779 100644 --- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp @@ -293,13 +293,11 @@ bool COFFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!"); if (getLexer().isNot(AsmToken::EndOfStatement)) { while (true) { - StringRef Name; + MCSymbol *Sym; - if (getParser().parseIdentifier(Name)) + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - getStreamer().emitSymbolAttribute(Sym, Attr); if (getLexer().is(AsmToken::EndOfStatement)) @@ -450,13 +448,11 @@ bool COFFAsmParser::parseDirectivePopSection(StringRef, SMLoc) { } bool COFFAsmParser::parseDirectiveDef(StringRef, SMLoc) { - StringRef SymbolName; + MCSymbol *Sym; - if (getParser().parseIdentifier(SymbolName)) + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName); - getStreamer().beginCOFFSymbolDef(Sym); Lex(); @@ -496,8 +492,8 @@ bool COFFAsmParser::parseDirectiveEndef(StringRef, SMLoc) { } bool COFFAsmParser::parseDirectiveSecRel32(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); int64_t Offset = 0; @@ -517,8 +513,6 @@ bool COFFAsmParser::parseDirectiveSecRel32(StringRef, SMLoc) { "invalid '.secrel32' directive offset, can't be less " "than zero or greater than std::numeric_limits::max()"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSecRel32(Symbol, Offset); return false; @@ -526,8 +520,8 @@ bool COFFAsmParser::parseDirectiveSecRel32(StringRef, SMLoc) { bool COFFAsmParser::parseDirectiveRVA(StringRef, SMLoc) { auto parseOp = [&]() -> bool { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); int64_t Offset = 0; @@ -544,8 +538,6 @@ bool COFFAsmParser::parseDirectiveRVA(StringRef, SMLoc) { "than -2147483648 or greater than " "2147483647"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - getStreamer().emitCOFFImgRel32(Symbol, Offset); return false; }; @@ -556,75 +548,65 @@ bool COFFAsmParser::parseDirectiveRVA(StringRef, SMLoc) { } bool COFFAsmParser::parseDirectiveSafeSEH(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSafeSEH(Symbol); return false; } bool COFFAsmParser::parseDirectiveSecIdx(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSectionIndex(Symbol); return false; } bool COFFAsmParser::parseDirectiveSymIdx(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSymbolIndex(Symbol); return false; } bool COFFAsmParser::parseDirectiveSecNum(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSecNumber(Symbol); return false; } bool COFFAsmParser::parseDirectiveSecOffset(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSecOffset(Symbol); return false; @@ -679,15 +661,13 @@ bool COFFAsmParser::parseDirectiveLinkOnce(StringRef, SMLoc Loc) { } bool COFFAsmParser::parseSEHDirectiveStartProc(StringRef, SMLoc Loc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitWinCFIStartProc(Symbol, Loc); return false; @@ -718,8 +698,8 @@ bool COFFAsmParser::parseSEHDirectiveEndChained(StringRef, SMLoc Loc) { } bool COFFAsmParser::parseSEHDirectiveHandler(StringRef, SMLoc Loc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *handler; + if (getParser().parseSymbol(handler)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -736,8 +716,6 @@ bool COFFAsmParser::parseSEHDirectiveHandler(StringRef, SMLoc Loc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *handler = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitWinEHHandler(handler, unwind, except, Loc); return false; diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp index 1bb617b327f1e..ef2815b037f2f 100644 --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -443,8 +443,8 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) { if (!getStreamer().getCurrentFragment()) return Error(getTok().getLoc(), "expected section directive"); - StringRef Label; - if (getParser().parseIdentifier(Label)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return Error(Loc, "expected identifier for procedure"); if (getLexer().is(AsmToken::Identifier)) { StringRef nextVal = getTok().getString(); @@ -459,12 +459,12 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) { nextLoc = getTok().getLoc(); } } - auto *Sym = - static_cast(getContext().getOrCreateSymbol(Label)); // Define symbol as simple external function - Sym->setExternal(true); - Sym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); + auto *COFFSym = static_cast(Sym); + COFFSym->setExternal(true); + COFFSym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); bool Framed = false; if (getLexer().is(AsmToken::Identifier) && @@ -475,7 +475,7 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) { } getStreamer().emitLabel(Sym, Loc); - CurrentProcedures.push_back(Label); + CurrentProcedures.push_back(Sym->getName()); CurrentProceduresFramed.push_back(Framed); return false; } diff --git a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp index a9095b3298f5e..fceb718d091c9 100644 --- a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp @@ -501,13 +501,10 @@ bool DarwinAsmParser::parseSectionSwitch(StringRef Segment, StringRef Section, /// parseDirectiveAltEntry /// ::= .alt_entry identifier bool DarwinAsmParser::parseDirectiveAltEntry(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Look up symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (Sym->isDefined()) return TokError(".alt_entry must preceed symbol definition"); @@ -521,13 +518,10 @@ bool DarwinAsmParser::parseDirectiveAltEntry(StringRef, SMLoc) { /// parseDirectiveDesc /// ::= .desc identifier , expression bool DarwinAsmParser::parseDirectiveDesc(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.desc' directive"); Lex(); @@ -560,18 +554,17 @@ bool DarwinAsmParser::parseDirectiveIndirectSymbol(StringRef, SMLoc Loc) { return Error(Loc, "indirect symbol not in a symbol pointer or stub " "section"); - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in .indirect_symbol directive"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - // Assembler local symbols don't make any sense here. Complain loudly. if (Sym->isTemporary()) return TokError("non-local symbol required in directive"); if (!getStreamer().emitSymbolAttribute(Sym, MCSA_IndirectSymbol)) - return TokError("unable to emit indirect symbol attribute for: " + Name); + return TokError("unable to emit indirect symbol attribute for: " + + Sym->getName()); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.indirect_symbol' directive"); @@ -633,13 +626,10 @@ bool DarwinAsmParser::parseDirectiveLinkerOption(StringRef IDVal, SMLoc) { /// parseDirectiveLsym /// ::= .lsym identifier , expression bool DarwinAsmParser::parseDirectiveLsym(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.lsym' directive"); Lex(); @@ -826,13 +816,10 @@ bool DarwinAsmParser::parseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) { /// ::= .tbss identifier, size, align bool DarwinAsmParser::parseDirectiveTBSS(StringRef, SMLoc) { SMLoc IDLoc = getLexer().getLoc(); - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); @@ -911,13 +898,10 @@ bool DarwinAsmParser::parseDirectiveZerofill(StringRef, SMLoc) { Lex(); SMLoc IDLoc = getLexer().getLoc(); - StringRef IDStr; - if (getParser().parseIdentifier(IDStr)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(IDStr); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 513f3b3da7813..19da9f57a4a6f 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -196,10 +196,9 @@ bool ELFAsmParser::parseSectionSwitch(StringRef Section, unsigned Type, } bool ELFAsmParser::parseDirectiveSize(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier"); - auto *Sym = static_cast(getContext().getOrCreateSymbol(Name)); if (getLexer().isNot(AsmToken::Comma)) return TokError("expected comma"); @@ -712,13 +711,10 @@ static MCSymbolAttr MCAttrForString(StringRef Type) { /// ::= .type identifier , %attribute /// ::= .type identifier , "attribute" bool ELFAsmParser::parseDirectiveType(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - bool AllowAt = getLexer().getAllowAtInIdentifier(); if (!AllowAt && !getContext().getAsmInfo()->getCommentString().starts_with("@")) @@ -790,8 +786,9 @@ bool ELFAsmParser::parseDirectiveIdent(StringRef, SMLoc) { /// parseDirectiveSymver /// ::= .symver foo, bar2@zed bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) { - StringRef OriginalName, Name, Action; - if (getParser().parseIdentifier(OriginalName)) + MCSymbol *OriginalSym; + StringRef Name, Action; + if (getParser().parseSymbol(OriginalSym)) return TokError("expected identifier"); if (getLexer().isNot(AsmToken::Comma)) @@ -819,8 +816,7 @@ bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) { } (void)parseOptionalToken(AsmToken::EndOfStatement); - getStreamer().emitELFSymverDirective( - getContext().getOrCreateSymbol(OriginalName), Name, KeepOriginalSym); + getStreamer().emitELFSymverDirective(OriginalSym, Name, KeepOriginalSym); return false; } @@ -853,8 +849,8 @@ bool ELFAsmParser::parseDirectiveVersion(StringRef, SMLoc) { bool ELFAsmParser::parseDirectiveWeakref(StringRef, SMLoc) { // FIXME: Share code with the other alias building directives. - StringRef AliasName; - if (getParser().parseIdentifier(AliasName)) + MCSymbol *Alias; + if (getParser().parseSymbol(Alias)) return TokError("expected identifier"); if (getLexer().isNot(AsmToken::Comma)) @@ -862,14 +858,10 @@ bool ELFAsmParser::parseDirectiveWeakref(StringRef, SMLoc) { Lex(); - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier"); - MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName); - - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - getStreamer().emitWeakReference(Alias, Sym); return false; } diff --git a/llvm/lib/MC/MCParser/MCAsmParser.cpp b/llvm/lib/MC/MCParser/MCAsmParser.cpp index 68b9cab2492f5..3721541c71e11 100644 --- a/llvm/lib/MC/MCParser/MCAsmParser.cpp +++ b/llvm/lib/MC/MCParser/MCAsmParser.cpp @@ -163,6 +163,15 @@ bool MCAsmParser::parseGNUAttribute(SMLoc L, int64_t &Tag, return true; } +bool MCAsmParser::parseSymbol(MCSymbol *&Res) { + StringRef Name; + if (parseIdentifier(Name)) + return true; + + Res = getContext().getOrCreateSymbol(Name); + return false; +} + void MCParsedAsmOperand::dump() const { // Cannot completely remove virtual function even in release mode. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 2dcfe0f3a420a..b38c2f7e41634 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -4503,9 +4503,9 @@ bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) { bool MasmParser::parseDirectiveExtern() { // .extern is the default - but we still need to take any provided type info. auto parseOp = [&]() -> bool { - StringRef Name; + MCSymbol *Sym; SMLoc NameLoc = getTok().getLoc(); - if (parseIdentifier(Name)) + if (parseSymbol(Sym)) return Error(NameLoc, "expected name"); if (parseToken(AsmToken::Colon)) return true; @@ -4518,12 +4518,10 @@ bool MasmParser::parseDirectiveExtern() { AsmTypeInfo Type; if (lookUpType(TypeName, Type)) return Error(TypeLoc, "unrecognized type"); - KnownType[Name.lower()] = Type; + KnownType[Sym->getName().lower()] = Type; } - auto *Sym = - static_cast(getContext().getOrCreateSymbol(Name)); - Sym->setExternal(true); + static_cast(Sym)->setExternal(true); getStreamer().emitSymbolAttribute(Sym, MCSA_Extern); return false; @@ -4538,11 +4536,10 @@ bool MasmParser::parseDirectiveExtern() { /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) { auto parseOp = [&]() -> bool { - StringRef Name; SMLoc Loc = getTok().getLoc(); - if (parseIdentifier(Name)) + MCSymbol *Sym; + if (parseSymbol(Sym)) return Error(Loc, "expected identifier"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); // Assembler local symbols don't make any sense here. Complain loudly. if (Sym->isTemporary()) @@ -4565,13 +4562,10 @@ bool MasmParser::parseDirectiveComm(bool IsLocal) { return true; SMLoc IDLoc = getLexer().getLoc(); - StringRef Name; - if (parseIdentifier(Name)) + MCSymbol *Sym; + if (parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp index ddfe1e10d9d0a..1befcacb3952f 100644 --- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp +++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp @@ -212,10 +212,9 @@ class WasmAsmParser : public MCAsmParserExtension { // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize // so maybe could be shared somehow. bool parseDirectiveSize(StringRef, SMLoc Loc) { - StringRef Name; - if (Parser->parseIdentifier(Name)) + MCSymbol *Sym; + if (Parser->parseSymbol(Sym)) return TokError("expected identifier in directive"); - auto Sym = getContext().getOrCreateSymbol(Name); if (expect(AsmToken::Comma, ",")) return true; const MCExpr *Expr; @@ -293,10 +292,9 @@ class WasmAsmParser : public MCAsmParserExtension { assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!"); if (getLexer().isNot(AsmToken::EndOfStatement)) { while (true) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitSymbolAttribute(Sym, Attr); if (getLexer().is(AsmToken::EndOfStatement)) break; From 3a2c8f7af8b38dd17649a42fc1f291d47f6e175d Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Fri, 12 Sep 2025 15:23:38 +0800 Subject: [PATCH 081/734] [RISCV] Move MachineCombiner to addILPOpts() (#158071) So that it runs before `MachineCSE` and other passes. Fixes https://github.com/llvm/llvm-project/issues/158063. --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 11 ++- llvm/test/CodeGen/RISCV/O3-pipeline.ll | 6 +- llvm/test/CodeGen/RISCV/machine-combiner.ll | 43 ++++----- llvm/test/CodeGen/RISCV/neg-abs.ll | 24 ++--- .../fixed-vectors-strided-load-store-asm.ll | 2 +- .../RISCV/rvv/vxrm-insert-out-of-loop.ll | 89 ++++++++++--------- .../CodeGen/RISCV/short-forward-branch-opt.ll | 4 +- 7 files changed, 87 insertions(+), 92 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index a1ec24f1fe719..0668b3896fa2d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -395,6 +395,7 @@ class RISCVPassConfig : public TargetPassConfig { void addPreRegAlloc() override; void addPostRegAlloc() override; void addFastRegAlloc() override; + bool addILPOpts() override; std::unique_ptr getCSEConfig() const override; }; @@ -580,9 +581,6 @@ void RISCVPassConfig::addMachineSSAOptimization() { TargetPassConfig::addMachineSSAOptimization(); - if (EnableMachineCombiner) - addPass(&MachineCombinerID); - if (TM->getTargetTriple().isRISCV64()) { addPass(createRISCVOptWInstrsPass()); } @@ -617,6 +615,13 @@ void RISCVPassConfig::addPostRegAlloc() { addPass(createRISCVRedundantCopyEliminationPass()); } +bool RISCVPassConfig::addILPOpts() { + if (EnableMachineCombiner) + addPass(&MachineCombinerID); + + return true; +} + void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM, OptimizationLevel Level) { diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index c7f70a9d266c2..ea08061221fd4 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -107,6 +107,9 @@ ; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Machine Trace Metrics +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine InstCombiner ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Early Machine Loop Invariant Code Motion ; CHECK-NEXT: MachineDominator Tree Construction @@ -117,9 +120,6 @@ ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions -; CHECK-NEXT: Machine Trace Metrics -; CHECK-NEXT: Lazy Machine Block Frequency Analysis -; CHECK-NEXT: Machine InstCombiner ; RV64-NEXT: RISC-V Optimize W Instructions ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Merge Base Offset diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll index 7a1c41c1839fa..69eca6dd7768a 100644 --- a/llvm/test/CodeGen/RISCV/machine-combiner.ll +++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll @@ -1094,33 +1094,19 @@ declare float @llvm.maxnum.f32(float, float) declare double @llvm.maxnum.f64(double, double) define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) { -; CHECK_LOCAL-LABEL: test_fmadd_strategy: -; CHECK_LOCAL: # %bb.0: # %entry -; CHECK_LOCAL-NEXT: fsub.d fa4, fa0, fa1 -; CHECK_LOCAL-NEXT: andi a0, a0, 1 -; CHECK_LOCAL-NEXT: fmv.d fa5, fa0 -; CHECK_LOCAL-NEXT: fmul.d fa0, fa4, fa2 -; CHECK_LOCAL-NEXT: beqz a0, .LBB76_2 -; CHECK_LOCAL-NEXT: # %bb.1: # %entry -; CHECK_LOCAL-NEXT: fmul.d fa4, fa5, fa1 -; CHECK_LOCAL-NEXT: fmadd.d fa5, fa5, fa1, fa0 -; CHECK_LOCAL-NEXT: fsub.d fa0, fa5, fa4 -; CHECK_LOCAL-NEXT: .LBB76_2: # %entry -; CHECK_LOCAL-NEXT: ret -; -; CHECK_GLOBAL-LABEL: test_fmadd_strategy: -; CHECK_GLOBAL: # %bb.0: # %entry -; CHECK_GLOBAL-NEXT: fsub.d fa4, fa0, fa1 -; CHECK_GLOBAL-NEXT: andi a0, a0, 1 -; CHECK_GLOBAL-NEXT: fmv.d fa5, fa0 -; CHECK_GLOBAL-NEXT: fmul.d fa0, fa4, fa2 -; CHECK_GLOBAL-NEXT: beqz a0, .LBB76_2 -; CHECK_GLOBAL-NEXT: # %bb.1: # %entry -; CHECK_GLOBAL-NEXT: fmul.d fa5, fa5, fa1 -; CHECK_GLOBAL-NEXT: fadd.d fa4, fa5, fa0 -; CHECK_GLOBAL-NEXT: fsub.d fa0, fa4, fa5 -; CHECK_GLOBAL-NEXT: .LBB76_2: # %entry -; CHECK_GLOBAL-NEXT: ret +; CHECK-LABEL: test_fmadd_strategy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsub.d fa5, fa0, fa1 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: beqz a0, .LBB76_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: fmul.d fa4, fa0, fa1 +; CHECK-NEXT: fmadd.d fa5, fa5, fa2, fa4 +; CHECK-NEXT: fsub.d fa0, fa5, fa4 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB76_2: +; CHECK-NEXT: fmul.d fa0, fa5, fa2 +; CHECK-NEXT: ret entry: %sub = fsub contract double %a0, %a1 %mul = fmul contract double %sub, %a2 @@ -1132,3 +1118,6 @@ entry: %retval.0 = select i1 %tobool.not, double %mul, double %sub3 ret double %retval.0 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK_GLOBAL: {{.*}} +; CHECK_LOCAL: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll index da81fe5708814..f9ccf7637eee9 100644 --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -208,14 +208,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) { ; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: snez a3, a0 -; RV32I-NEXT: neg a4, a1 -; RV32I-NEXT: sub a3, a4, a3 -; RV32I-NEXT: neg a4, a0 +; RV32I-NEXT: snez a4, a0 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: add a4, a1, a4 +; RV32I-NEXT: neg a4, a4 ; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: sw a1, 4(a2) -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: neg_abs64_multiuse: @@ -227,14 +227,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) { ; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: .LBB5_2: -; RV32ZBB-NEXT: snez a3, a0 -; RV32ZBB-NEXT: neg a4, a1 -; RV32ZBB-NEXT: sub a3, a4, a3 -; RV32ZBB-NEXT: neg a4, a0 +; RV32ZBB-NEXT: snez a4, a0 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: add a4, a1, a4 +; RV32ZBB-NEXT: neg a4, a4 ; RV32ZBB-NEXT: sw a0, 0(a2) ; RV32ZBB-NEXT: sw a1, 4(a2) -; RV32ZBB-NEXT: mv a0, a4 -; RV32ZBB-NEXT: mv a1, a3 +; RV32ZBB-NEXT: mv a0, a3 +; RV32ZBB-NEXT: mv a1, a4 ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: neg_abs64_multiuse: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 83b435ddff902..056f55260b854 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -934,7 +934,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: add a1, a1, a5 ; CHECK-NEXT: slli a3, a3, 32 ; CHECK-NEXT: srli a3, a3, 32 -; CHECK-NEXT: add a0, a4, a0 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: .LBB14_6: # %bb35 diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index dddcd4f107e3b..ead79fcf53d8b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -18,13 +18,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph ; RV32-NEXT: blez a6, .LBB0_17 ; RV32-NEXT: # %bb.2: # %for.cond1.preheader.us.preheader -; RV32-NEXT: addi t0, a7, -1 +; RV32-NEXT: addi t3, a7, -1 ; RV32-NEXT: csrr t2, vlenb -; RV32-NEXT: mul t3, a1, t0 -; RV32-NEXT: mul t4, a3, t0 -; RV32-NEXT: mul t5, a5, t0 ; RV32-NEXT: slli t1, t2, 1 -; RV32-NEXT: li t6, 32 +; RV32-NEXT: li t4, 32 ; RV32-NEXT: mv t0, t1 ; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader ; RV32-NEXT: li t0, 32 @@ -34,27 +31,32 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 0(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 ; RV32-NEXT: .cfi_offset s1, -8 ; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: .cfi_offset s3, -16 ; RV32-NEXT: .cfi_remember_state -; RV32-NEXT: add t3, a0, t3 -; RV32-NEXT: add t4, a2, t4 -; RV32-NEXT: add s0, a4, t5 -; RV32-NEXT: bltu t6, t1, .LBB0_6 +; RV32-NEXT: mul t5, a1, t3 +; RV32-NEXT: add s0, a0, a6 +; RV32-NEXT: mul t6, a3, t3 +; RV32-NEXT: add s2, a2, a6 +; RV32-NEXT: mul s1, a5, t3 +; RV32-NEXT: add s3, a4, a6 +; RV32-NEXT: bltu t4, t1, .LBB0_6 ; RV32-NEXT: # %bb.5: # %for.cond1.preheader.us.preheader ; RV32-NEXT: li t1, 32 ; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us.preheader -; RV32-NEXT: add t3, t3, a6 -; RV32-NEXT: add t5, t4, a6 -; RV32-NEXT: add t4, s0, a6 +; RV32-NEXT: add t3, s0, t5 +; RV32-NEXT: add t6, s2, t6 +; RV32-NEXT: add t4, s3, s1 ; RV32-NEXT: j .LBB0_8 ; RV32-NEXT: # %bb.7: # %for.cond1.preheader.us.preheader ; RV32-NEXT: mv t1, t0 ; RV32-NEXT: .LBB0_8: # %for.cond1.preheader.us.preheader ; RV32-NEXT: .cfi_restore_state ; RV32-NEXT: li t0, 0 -; RV32-NEXT: sltu t5, a0, t5 +; RV32-NEXT: sltu t5, a0, t6 ; RV32-NEXT: sltu t6, a2, t3 ; RV32-NEXT: and t5, t5, t6 ; RV32-NEXT: sltu t4, a0, t4 @@ -140,9 +142,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 0(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore s0 ; RV32-NEXT: .cfi_restore s1 ; RV32-NEXT: .cfi_restore s2 +; RV32-NEXT: .cfi_restore s3 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: .LBB0_17: # %for.cond.cleanup @@ -190,7 +194,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64P670-NEXT: or t6, s0, s1 ; RV64P670-NEXT: sltu s1, a0, t5 ; RV64P670-NEXT: sltu s0, a4, t4 -; RV64P670-NEXT: mv t5, a0 +; RV64P670-NEXT: add t4, a0, a6 ; RV64P670-NEXT: and s0, s0, s1 ; RV64P670-NEXT: or s1, a1, a5 ; RV64P670-NEXT: srli s1, s1, 63 @@ -200,11 +204,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64P670-NEXT: or s0, t6, s0 ; RV64P670-NEXT: sltu s1, a6, s1 ; RV64P670-NEXT: or s0, s0, s1 -; RV64P670-NEXT: andi t4, s0, 1 +; RV64P670-NEXT: andi t5, s0, 1 ; RV64P670-NEXT: j .LBB0_4 ; RV64P670-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us ; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1 -; RV64P670-NEXT: add t5, t5, a1 +; RV64P670-NEXT: add a0, a0, a1 ; RV64P670-NEXT: add a2, a2, a3 ; RV64P670-NEXT: add a4, a4, a5 ; RV64P670-NEXT: addiw t1, t1, 1 @@ -214,7 +218,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64P670-NEXT: # =>This Loop Header: Depth=1 ; RV64P670-NEXT: # Child Loop BB0_7 Depth 2 ; RV64P670-NEXT: # Child Loop BB0_10 Depth 2 -; RV64P670-NEXT: beqz t4, .LBB0_6 +; RV64P670-NEXT: beqz t5, .LBB0_6 ; RV64P670-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1 ; RV64P670-NEXT: li t6, 0 ; RV64P670-NEXT: j .LBB0_9 @@ -223,7 +227,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64P670-NEXT: slli s1, t2, 28 ; RV64P670-NEXT: mv s2, a2 ; RV64P670-NEXT: mv s3, a4 -; RV64P670-NEXT: mv s4, t5 +; RV64P670-NEXT: mv s4, a0 ; RV64P670-NEXT: sub s1, s1, t3 ; RV64P670-NEXT: vsetvli s0, zero, e8, m2, ta, ma ; RV64P670-NEXT: and t6, s1, a6 @@ -246,11 +250,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64P670-NEXT: .LBB0_9: # %for.body4.us.preheader ; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1 ; RV64P670-NEXT: mul s2, a1, t0 -; RV64P670-NEXT: add s0, a0, a6 -; RV64P670-NEXT: add s1, t5, t6 +; RV64P670-NEXT: add s1, a0, t6 ; RV64P670-NEXT: add s4, a4, t6 ; RV64P670-NEXT: add t6, t6, a2 -; RV64P670-NEXT: add s2, s2, s0 +; RV64P670-NEXT: add s2, s2, t4 ; RV64P670-NEXT: .LBB0_10: # %for.body4.us ; RV64P670-NEXT: # Parent Loop BB0_4 Depth=1 ; RV64P670-NEXT: # => This Inner Loop Header: Depth=2 @@ -332,12 +335,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: or s0, t4, s0 ; RV64X60-NEXT: sltu s1, a6, s1 ; RV64X60-NEXT: or s0, s0, s1 -; RV64X60-NEXT: andi t4, s0, 1 -; RV64X60-NEXT: mv t5, a0 +; RV64X60-NEXT: add t4, a0, a6 +; RV64X60-NEXT: andi t5, s0, 1 ; RV64X60-NEXT: j .LBB0_4 ; RV64X60-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us ; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1 -; RV64X60-NEXT: add t5, t5, a1 +; RV64X60-NEXT: add a0, a0, a1 ; RV64X60-NEXT: add a2, a2, a3 ; RV64X60-NEXT: addiw t1, t1, 1 ; RV64X60-NEXT: add a4, a4, a5 @@ -347,7 +350,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: # =>This Loop Header: Depth=1 ; RV64X60-NEXT: # Child Loop BB0_7 Depth 2 ; RV64X60-NEXT: # Child Loop BB0_10 Depth 2 -; RV64X60-NEXT: beqz t4, .LBB0_6 +; RV64X60-NEXT: beqz t5, .LBB0_6 ; RV64X60-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1 ; RV64X60-NEXT: li t6, 0 ; RV64X60-NEXT: j .LBB0_9 @@ -358,7 +361,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: and t6, s1, a6 ; RV64X60-NEXT: mv s2, a2 ; RV64X60-NEXT: mv s3, a4 -; RV64X60-NEXT: mv s4, t5 +; RV64X60-NEXT: mv s4, a0 ; RV64X60-NEXT: mv s1, t6 ; RV64X60-NEXT: vsetvli s0, zero, e8, m2, ta, ma ; RV64X60-NEXT: .LBB0_7: # %vector.body @@ -379,9 +382,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: .LBB0_9: # %for.body4.us.preheader ; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1 ; RV64X60-NEXT: mul s2, a1, t0 -; RV64X60-NEXT: add s1, a0, a6 -; RV64X60-NEXT: add s0, t5, t6 -; RV64X60-NEXT: add s2, s2, s1 +; RV64X60-NEXT: add s0, a0, t6 +; RV64X60-NEXT: add s2, s2, t4 ; RV64X60-NEXT: add s4, a4, t6 ; RV64X60-NEXT: add t6, t6, a2 ; RV64X60-NEXT: .LBB0_10: # %for.body4.us @@ -466,16 +468,16 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64-NEXT: or s0, a1, a5 ; RV64-NEXT: srli s0, s0, 63 ; RV64-NEXT: or t5, t5, s0 +; RV64-NEXT: sltu s0, a6, t4 ; RV64-NEXT: or t5, t6, t5 -; RV64-NEXT: sltu t4, a6, t4 -; RV64-NEXT: or t4, t4, t5 -; RV64-NEXT: andi t4, t4, 1 -; RV64-NEXT: mv t5, a0 +; RV64-NEXT: add t4, a0, a6 +; RV64-NEXT: or t5, s0, t5 +; RV64-NEXT: andi t5, t5, 1 ; RV64-NEXT: csrwi vxrm, 0 ; RV64-NEXT: j .LBB0_6 ; RV64-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us ; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1 -; RV64-NEXT: add t5, t5, a1 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a2, a2, a3 ; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: addiw t3, t3, 1 @@ -485,7 +487,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64-NEXT: # =>This Loop Header: Depth=1 ; RV64-NEXT: # Child Loop BB0_9 Depth 2 ; RV64-NEXT: # Child Loop BB0_12 Depth 2 -; RV64-NEXT: beqz t4, .LBB0_8 +; RV64-NEXT: beqz t5, .LBB0_8 ; RV64-NEXT: # %bb.7: # in Loop: Header=BB0_6 Depth=1 ; RV64-NEXT: li t6, 0 ; RV64-NEXT: j .LBB0_11 @@ -496,7 +498,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64-NEXT: and t6, t6, a6 ; RV64-NEXT: mv s0, a2 ; RV64-NEXT: mv s1, a4 -; RV64-NEXT: mv s2, t5 +; RV64-NEXT: mv s2, a0 ; RV64-NEXT: mv s3, t6 ; RV64-NEXT: vsetvli s4, zero, e8, m2, ta, ma ; RV64-NEXT: .LBB0_9: # %vector.body @@ -516,25 +518,24 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64-NEXT: beq t6, a6, .LBB0_5 ; RV64-NEXT: .LBB0_11: # %for.body4.us.preheader ; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1 -; RV64-NEXT: mul s1, a1, t2 -; RV64-NEXT: add s2, a0, a6 -; RV64-NEXT: add s0, t5, t6 -; RV64-NEXT: add s1, s2, s1 -; RV64-NEXT: add s2, a4, t6 +; RV64-NEXT: mul s2, a1, t2 +; RV64-NEXT: add s0, a0, t6 +; RV64-NEXT: add s1, a4, t6 +; RV64-NEXT: add s2, t4, s2 ; RV64-NEXT: add t6, a2, t6 ; RV64-NEXT: .LBB0_12: # %for.body4.us ; RV64-NEXT: # Parent Loop BB0_6 Depth=1 ; RV64-NEXT: # => This Inner Loop Header: Depth=2 ; RV64-NEXT: lbu s3, 0(t6) -; RV64-NEXT: lbu s4, 0(s2) +; RV64-NEXT: lbu s4, 0(s1) ; RV64-NEXT: add s3, s3, s4 ; RV64-NEXT: addi s3, s3, 1 ; RV64-NEXT: srli s3, s3, 1 ; RV64-NEXT: sb s3, 0(s0) ; RV64-NEXT: addi s0, s0, 1 -; RV64-NEXT: addi s2, s2, 1 +; RV64-NEXT: addi s1, s1, 1 ; RV64-NEXT: addi t6, t6, 1 -; RV64-NEXT: bne s0, s1, .LBB0_12 +; RV64-NEXT: bne s0, s2, .LBB0_12 ; RV64-NEXT: j .LBB0_5 ; RV64-NEXT: .LBB0_13: ; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll index 59a702ab6b17f..1bfeeb92e06dd 100644 --- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll +++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll @@ -2075,14 +2075,14 @@ define i64 @abs_i64(i64 %x) { ; RV32SFB-LABEL: abs_i64: ; RV32SFB: # %bb.0: ; RV32SFB-NEXT: snez a2, a0 -; RV32SFB-NEXT: add a2, a2, a1 +; RV32SFB-NEXT: neg a3, a1 ; RV32SFB-NEXT: bgez a1, .LBB35_2 ; RV32SFB-NEXT: # %bb.1: ; RV32SFB-NEXT: neg a0, a0 ; RV32SFB-NEXT: .LBB35_2: ; RV32SFB-NEXT: bgez a1, .LBB35_4 ; RV32SFB-NEXT: # %bb.3: -; RV32SFB-NEXT: neg a1, a2 +; RV32SFB-NEXT: sub a1, a3, a2 ; RV32SFB-NEXT: .LBB35_4: ; RV32SFB-NEXT: ret %a = call i64 @llvm.abs.i64(i64 %x, i1 false) From 152d0f5c0c0eaea369bf534b673d7625700ca7ef Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 12 Sep 2025 00:26:54 -0700 Subject: [PATCH 082/734] [Support] Deprecate one form of support::endian::write (NFC) (#156140) We have two forms of write: template inline void write(void *memory, value_type value, endianness endian) template inline void write(void *memory, value_type value) The difference is that endian is a function parameter in the former but a template parameter in the latter. This patch streamlines the code by migrating the use of the latter to the former while deprecating the latter. I'm planning to do the same for byte_swap and read in follow-up patches to keep this patch simple and small. --- llvm/include/llvm/Support/Endian.h | 13 ++++++------- llvm/lib/ObjectYAML/GOFFEmitter.cpp | 4 ++-- llvm/unittests/Support/EndianTest.cpp | 14 +++++++------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Support/Endian.h b/llvm/include/llvm/Support/Endian.h index 02a3194e09784..7eb1d7e8dfe7f 100644 --- a/llvm/include/llvm/Support/Endian.h +++ b/llvm/include/llvm/Support/Endian.h @@ -96,9 +96,8 @@ inline void write(void *memory, value_type value, endianness endian) { &value, sizeof(value_type)); } -template +template +LLVM_DEPRECATED("Pass endian as a function argument instead", "write") inline void write(void *memory, value_type value) { write(memory, value, endian); } @@ -163,7 +162,7 @@ inline void writeAtBitAlignment(void *memory, value_type value, uint64_t startBit) { assert(startBit < 8); if (startBit == 0) - write(memory, value); + write(memory, value, endian); else { // Read two values and shift the result into them. value_type val[2]; @@ -230,8 +229,8 @@ struct packed_endian_specific_integral { operator value_type() const { return value(); } void operator=(value_type newValue) { - endian::write( - (void*)Value.buffer, newValue); + endian::write((void *)Value.buffer, newValue, + endian); } packed_endian_specific_integral &operator+=(value_type newValue) { @@ -268,7 +267,7 @@ struct packed_endian_specific_integral { } void operator=(value_type NewValue) { - endian::write(Ptr, NewValue); + endian::write(Ptr, NewValue, endian); } private: diff --git a/llvm/lib/ObjectYAML/GOFFEmitter.cpp b/llvm/lib/ObjectYAML/GOFFEmitter.cpp index 7e94ac609a030..c26893cfaa720 100644 --- a/llvm/lib/ObjectYAML/GOFFEmitter.cpp +++ b/llvm/lib/ObjectYAML/GOFFEmitter.cpp @@ -38,8 +38,8 @@ template struct BinaryBeImpl { template raw_ostream &operator<<(raw_ostream &OS, const BinaryBeImpl &BBE) { char Buffer[sizeof(BBE.Value)]; - support::endian::write( - Buffer, BBE.Value); + support::endian::write(Buffer, BBE.Value, + llvm::endianness::big); OS.write(Buffer, sizeof(BBE.Value)); return OS; } diff --git a/llvm/unittests/Support/EndianTest.cpp b/llvm/unittests/Support/EndianTest.cpp index 59281c0ed5444..c48b7707b7751 100644 --- a/llvm/unittests/Support/EndianTest.cpp +++ b/llvm/unittests/Support/EndianTest.cpp @@ -201,26 +201,26 @@ TEST(Endian, WriteBitAligned) { TEST(Endian, Write) { unsigned char data[5]; - endian::write(data, -1362446643); + endian::write(data, -1362446643, llvm::endianness::big); EXPECT_EQ(data[0], 0xAE); EXPECT_EQ(data[1], 0xCA); EXPECT_EQ(data[2], 0xB6); EXPECT_EQ(data[3], 0xCD); - endian::write(data + 1, - -1362446643); + endian::write(data + 1, -1362446643, + llvm::endianness::big); EXPECT_EQ(data[1], 0xAE); EXPECT_EQ(data[2], 0xCA); EXPECT_EQ(data[3], 0xB6); EXPECT_EQ(data[4], 0xCD); - endian::write(data, - -1362446643); + endian::write(data, -1362446643, + llvm::endianness::little); EXPECT_EQ(data[0], 0xCD); EXPECT_EQ(data[1], 0xB6); EXPECT_EQ(data[2], 0xCA); EXPECT_EQ(data[3], 0xAE); - endian::write(data + 1, - -1362446643); + endian::write(data + 1, -1362446643, + llvm::endianness::little); EXPECT_EQ(data[1], 0xCD); EXPECT_EQ(data[2], 0xB6); EXPECT_EQ(data[3], 0xCA); From a7521a81c4b7aa135086488a566eab2dbc6b1326 Mon Sep 17 00:00:00 2001 From: Boyao Wang Date: Fri, 12 Sep 2025 15:38:41 +0800 Subject: [PATCH 083/734] [RISCV][MC] Add MC support of Zibi experimental extension (#127463) This adds the MC support of Zibi v0.1 experimental extension. References: * https://lf-riscv.atlassian.net/wiki/spaces/USXX/pages/599261201/Branch+with+Immediate+Zibi+Ratification+Plan * https://lf-riscv.atlassian.net/browse/RVS-3828 * https://github.com/riscv/zibi/releases/tag/v0.1.0 --- .../Driver/print-supported-extensions-riscv.c | 1 + .../test/Preprocessor/riscv-target-features.c | 9 +++ llvm/docs/RISCVUsage.rst | 3 + llvm/docs/ReleaseNotes.md | 1 + .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 9 +++ .../RISCV/Disassembler/RISCVDisassembler.cpp | 8 +++ .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 1 + .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 17 +++++ llvm/lib/Target/RISCV/RISCVFeatures.td | 6 ++ llvm/lib/Target/RISCV/RISCVInstrFormats.td | 16 +++++ llvm/lib/Target/RISCV/RISCVInstrInfo.td | 1 + llvm/lib/Target/RISCV/RISCVInstrInfoZibi.td | 44 +++++++++++++ llvm/test/CodeGen/RISCV/attributes.ll | 4 ++ llvm/test/CodeGen/RISCV/features-info.ll | 1 + llvm/test/MC/RISCV/zibi-invalid.s | 34 ++++++++++ llvm/test/MC/RISCV/zibi-valid.s | 63 +++++++++++++++++++ .../TargetParser/RISCVISAInfoTest.cpp | 1 + 17 files changed, 219 insertions(+) create mode 100644 llvm/lib/Target/RISCV/RISCVInstrInfoZibi.td create mode 100644 llvm/test/MC/RISCV/zibi-invalid.s create mode 100644 llvm/test/MC/RISCV/zibi-valid.s diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index 413275dba8438..f619d32254d15 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -212,6 +212,7 @@ // CHECK-EMPTY: // CHECK-NEXT: Experimental extensions // CHECK-NEXT: p 0.15 'P' ('Base P' (Packed SIMD)) +// CHECK-NEXT: zibi 0.1 'Zibi' (Branch with Immediate) // CHECK-NEXT: zicfilp 1.0 'Zicfilp' (Landing pad) // CHECK-NEXT: zicfiss 1.0 'Zicfiss' (Shadow stack) // CHECK-NEXT: zalasr 0.1 'Zalasr' (Load-Acquire and Store-Release Instructions) diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 204c9851e680c..0dcdb29445b4b 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -96,6 +96,7 @@ // CHECK-NOT: __riscv_zfinx {{.*$}} // CHECK-NOT: __riscv_zhinx {{.*$}} // CHECK-NOT: __riscv_zhinxmin {{.*$}} +// CHECK-NOT: __riscv_zibi {{.*$}} // CHECK-NOT: __riscv_zic64b {{.*$}} // CHECK-NOT: __riscv_zicbom {{.*$}} // CHECK-NOT: __riscv_zicbop {{.*$}} @@ -812,6 +813,14 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZHINXMIN-EXT %s // CHECK-ZHINXMIN-EXT: __riscv_zhinxmin 1000000{{$}} +// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: -march=rv32i_zibi0p1 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZIBI-EXT %s +// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: -march=rv64i_zibi0p1 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZIBI-EXT %s +// CHECK-ZIBI-EXT: __riscv_zibi + // RUN: %clang --target=riscv32-unknown-linux-gnu \ // RUN: -march=rv32izic64b -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZIC64B-EXT %s diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index d6c7b46485ccf..cfe090eddfa09 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -327,6 +327,9 @@ The primary goal of experimental support is to assist in the process of ratifica ``experimental-zalasr`` LLVM implements the `0.0.5 draft specification `__. +``experimental-zibi`` + LLVM implements the `0.1 release specification `__. + ``experimental-zicfilp``, ``experimental-zicfiss`` LLVM implements the `1.0 release specification `__. diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 16174553ba7f2..3c3799321606a 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -124,6 +124,7 @@ Changes to the RISC-V Backend using `$x` with an architecture string suffix is not yet supported. * Ssctr and Smctr extensions are no longer experimental. * Add support for Zvfbfa (Additional BF16 vector compute support) +* Adds experimental support for the 'Zibi` (Branch with Immediate) extension. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index cd8392849ac40..2b5f18d611524 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -946,6 +946,11 @@ struct RISCVOperand final : public MCParsedAsmOperand { return isUImmPred([](int64_t Imm) { return 4 == Imm; }); } + bool isImm5Zibi() const { + return isUImmPred( + [](int64_t Imm) { return (Imm != 0 && isUInt<5>(Imm)) || Imm == -1; }); + } + bool isSImm5Plus1() const { return isSImmPred( [](int64_t Imm) { return Imm != INT64_MIN && isInt<5>(Imm - 1); }); @@ -1643,6 +1648,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "operand must be a valid system register " "name or an integer in the range"); } + case Match_InvalidImm5Zibi: + return generateImmOutOfRangeError( + Operands, ErrorInfo, -1, (1 << 5) - 1, + "immediate must be non-zero in the range"); case Match_InvalidVTypeI: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); return generateVTypeError(ErrorLoc); diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 61b86abdc4ca9..fb5a35daaf58f 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -491,6 +491,14 @@ static DecodeStatus decodeUImmPlus1Operand(MCInst &Inst, uint32_t Imm, return MCDisassembler::Success; } +static DecodeStatus decodeImmZibiOperand(MCInst &Inst, uint32_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<5>(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::createImm(Imm ? Imm : -1LL)); + return MCDisassembler::Success; +} + template static DecodeStatus decodeSImmOperand(MCInst &Inst, uint32_t Imm, int64_t Address, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index fcea23a5275c0..70b7c430c410e 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -341,6 +341,7 @@ enum OperandType : unsigned { OPERAND_UIMM64, OPERAND_THREE, OPERAND_FOUR, + OPERAND_IMM5_ZIBI, OPERAND_SIMM5, OPERAND_SIMM5_NONZERO, OPERAND_SIMM5_PLUS1, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 717fba68b48ed..6d587e6f167fc 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -97,6 +97,10 @@ class RISCVMCCodeEmitter : public MCCodeEmitter { SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + uint64_t getImmOpValueZibi(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint64_t getImmOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; @@ -559,6 +563,19 @@ RISCVMCCodeEmitter::getImmOpValueAsrN(const MCInst &MI, unsigned OpNo, return getImmOpValue(MI, OpNo, Fixups, STI); } +uint64_t +RISCVMCCodeEmitter::getImmOpValueZibi(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + assert(MO.isImm() && "Zibi operand must be an immediate"); + int64_t Res = MO.getImm(); + if (Res == -1) + return 0; + + return Res; +} + uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 063963d4ec36b..95703e33926c5 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -78,6 +78,12 @@ def FeatureStdExtE : RISCVExtension<2, 0, "Embedded Instruction Set with 16 GPRs">, RISCVExtensionBitmask<0, 4>; +def FeatureStdExtZibi + : RISCVExperimentalExtension<0, 1, "Branch with Immediate">; +def HasStdExtZibi : Predicate<"Subtarget->hasStdExtZibi()">, + AssemblerPredicate<(all_of FeatureStdExtZibi), + "'Zibi' (Branch with Immediate)">; + def FeatureStdExtZic64b : RISCVExtension<1, 0, "Cache Block Size Is 64 Bytes">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index c2667b0e7c9e4..2afd77a96373b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -498,6 +498,22 @@ class RVInstB funct3, RISCVOpcode opcode, dag outs, dag ins, let Inst{6-0} = opcode.Value; } +class RVInstBIMM funct3, RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst { + bits<12> imm12; + bits<5> cimm; + bits<5> rs1; + let Inst{31} = imm12{11}; + let Inst{30-25} = imm12{9-4}; + let Inst{24-20} = cimm; + let Inst{19-15} = rs1; + let Inst{14-12} = funct3; + let Inst{11-8} = imm12{3-0}; + let Inst{7} = imm12{10}; + let Inst{6-0} = opcode.Value; +} + class RVInstU : RVInst { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 92552b36aa0b9..7cdfb1e0eba01 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -2329,6 +2329,7 @@ include "RISCVInstrInfoZimop.td" include "RISCVInstrInfoZicbo.td" include "RISCVInstrInfoZicond.td" include "RISCVInstrInfoZilsd.td" +include "RISCVInstrInfoZibi.td" // Scalar FP include "RISCVInstrInfoF.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZibi.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZibi.td new file mode 100644 index 0000000000000..1570355e3da54 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZibi.td @@ -0,0 +1,44 @@ +//===-- RISCVInstrInfoZibi.td - 'Zibi' instructions --------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file describes the RISC-V instructions for 'Zibi' (branch with imm). +/// +//===----------------------------------------------------------------------===// + +// A 5-bit unsigned immediate representing 1-31 and -1. 00000 represents -1. +def imm5_zibi : RISCVOp, ImmLeaf(Imm)) || Imm == -1; +}]> { + let ParserMatchClass = ImmAsmOperand<"", 5, "Zibi">; + let EncoderMethod = "getImmOpValueZibi"; + let DecoderMethod = "decodeImmZibiOperand"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return (Imm >= 1 && Imm <= 31) || Imm == -1; + }]; + let OperandType = "OPERAND_IMM5_ZIBI"; +} + +class Branch_imm funct3, string opcodestr> + : RVInstBIMM, + Sched<[WriteJmp, ReadJmp]> { + let isBranch = 1; + let isTerminator = 1; + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; +} + +let Predicates = [HasStdExtZibi] in { + def BEQI : Branch_imm<0b010, "beqi">; + def BNEI : Branch_imm<0b011, "bnei">; +} // Predicates = [HasStdExtZibi] diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index eacd5c9a88bba..154fb83172341 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -143,6 +143,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+supm %s -o - | FileCheck --check-prefix=RV32SUPM %s ; RUN: llc -mtriple=riscv32 -mattr=+smctr %s -o - | FileCheck --check-prefix=RV32SMCTR %s ; RUN: llc -mtriple=riscv32 -mattr=+ssctr %s -o - | FileCheck --check-prefix=RV32SSCTR %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zibi %s -o - | FileCheck --check-prefix=RV32ZIBI %s ; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefixes=CHECK,RV64M %s @@ -292,6 +293,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+sdext %s -o - | FileCheck --check-prefix=RV64SDEXT %s ; RUN: llc -mtriple=riscv64 -mattr=+sdtrig %s -o - | FileCheck --check-prefix=RV64SDTRIG %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-p %s -o - | FileCheck --check-prefix=RV64P %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zibi %s -o - | FileCheck --check-prefix=RV64ZIBI %s ; Tests for profile features. @@ -452,6 +454,7 @@ ; RV32SMCTR: .attribute 5, "rv32i2p1_smctr1p0_sscsrind1p0" ; RV32SSCTR: .attribute 5, "rv32i2p1_sscsrind1p0_ssctr1p0" ; RV32P: .attribute 5, "rv32i2p1_p0p15" +; RV32ZIBI: .attribute 5, "rv32i2p1_zibi0p1" ; RV64M: .attribute 5, "rv64i2p1_m2p0_zmmul1p0" ; RV64ZMMUL: .attribute 5, "rv64i2p1_zmmul1p0" @@ -599,6 +602,7 @@ ; RV64SDEXT: .attribute 5, "rv64i2p1_sdext1p0" ; RV64SDTRIG: .attribute 5, "rv64i2p1_sdtrig1p0" ; RV64P: .attribute 5, "rv64i2p1_p0p15" +; RV64ZIBI: .attribute 5, "rv64i2p1_zibi0p1" ; RVI20U32: .attribute 5, "rv32i2p1" ; RVI20U64: .attribute 5, "rv64i2p1" diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 01b8c0eaadb05..a3b56c6fd3d77 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -52,6 +52,7 @@ ; CHECK-NEXT: experimental-xsfmclic - 'XSfmclic' (SiFive CLIC Machine-mode CSRs). ; CHECK-NEXT: experimental-xsfsclic - 'XSfsclic' (SiFive CLIC Supervisor-mode CSRs). ; CHECK-NEXT: experimental-zalasr - 'Zalasr' (Load-Acquire and Store-Release Instructions). +; CHECK-NEXT: experimental-zibi - 'Zibi' (Branch with Immediate). ; CHECK-NEXT: experimental-zicfilp - 'Zicfilp' (Landing pad). ; CHECK-NEXT: experimental-zicfiss - 'Zicfiss' (Shadow stack). ; CHECK-NEXT: experimental-zvbc32e - 'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements). diff --git a/llvm/test/MC/RISCV/zibi-invalid.s b/llvm/test/MC/RISCV/zibi-invalid.s new file mode 100644 index 0000000000000..50e5f0709fa6c --- /dev/null +++ b/llvm/test/MC/RISCV/zibi-invalid.s @@ -0,0 +1,34 @@ +# RUN: not llvm-mc -triple=riscv32 --mattr=+experimental-zibi %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR +# RUN: not llvm-mc -triple=riscv64 --mattr=+experimental-zibi %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR +beqi a0, 0x0, 0x400 +# CHECK-ERROR: [[@LINE-1]]:10: error: immediate must be non-zero in the range [-1, 31] +# CHECK-ERROR-LABEL: beqi a0, 0x0, 0x400 +beqi a0, 0x21, 0x400 +# CHECK-ERROR: [[@LINE-1]]:10: error: immediate must be non-zero in the range [-1, 31] +# CHECK-ERROR-LABEL: beqi a0, 0x21, 0x400 +beqi a2, 0x10, -0x1f000 +# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094] +# CHECK-ERROR-LABEL: beqi a2, 0x10, -0x1f000 +beqi a2, 0x10, 0x1000 +# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094] +# CHECK-ERROR-LABEL: beqi a2, 0x10, 0x1000 +beqi a2, 0x10, 0x111 +# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094] +# CHECK-ERROR-LABEL: beqi a2, 0x10, 0x111 +bnei a0, 0x0, 0x400 +# CHECK-ERROR: [[@LINE-1]]:10: error: immediate must be non-zero in the range [-1, 31] +# CHECK-ERROR-LABEL: bnei a0, 0x0, 0x400 +bnei a0, 0x21, 0x400 +# CHECK-ERROR: [[@LINE-1]]:10: error: immediate must be non-zero in the range [-1, 31] +# CHECK-ERROR-LABEL: bnei a0, 0x21, 0x400 +bnei a2, 0x10, -0x1f000 +# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094] +# CHECK-ERROR-LABEL: bnei a2, 0x10, -0x1f000 +bnei a2, 0x10, 0x1000 +# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094] +# CHECK-ERROR-LABEL: bnei a2, 0x10, 0x1000 +bnei a2, 0x10, 0x111 +# CHECK-ERROR: [[@LINE-1]]:16: error: immediate must be a multiple of 2 bytes in the range [-4096, 4094] +# CHECK-ERROR-LABEL: bnei a2, 0x10, 0x111 diff --git a/llvm/test/MC/RISCV/zibi-valid.s b/llvm/test/MC/RISCV/zibi-valid.s new file mode 100644 index 0000000000000..b062c4cf1efb3 --- /dev/null +++ b/llvm/test/MC/RISCV/zibi-valid.s @@ -0,0 +1,63 @@ +# RUN: llvm-mc -triple=riscv32 -show-encoding --mattr=+experimental-zibi %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-ASM +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-zibi %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-ASM +# RUN: not llvm-mc -triple=riscv32 -show-encoding %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR +# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR +# RUN: llvm-mc -triple=riscv32 -filetype=obj --mattr=+experimental-zibi %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-zibi --no-print-imm-hex - \ +# RUN: | FileCheck %s --check-prefix=CHECK-OBJ +# RUN: llvm-mc -triple=riscv32 -filetype=obj --mattr=+experimental-zibi %s \ +# RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-zibi %s \ +# RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +beqi a0, 1, 1024 +# CHECK-OBJ: beqi a0, 1, 0x400 +# CHECK-ASM: beqi a0, 1, 1024 +# CHECK-ENCODING: [0x63,0x20,0x15,0x40] +# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}} +# CHECK-UNKNOWN: 40152063 +beqi a5, -1, -1024 +# CHECK-OBJ: beqi a5, -1, 0xfffffc04 +# CHECK-ASM: beqi a5, -1, -1024 +# CHECK-ENCODING: [0xe3,0xa0,0x07,0xc0] +# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}} +# CHECK-UNKNOWN: c007a0e3 +beqi s0, 22, 0xffe +# CHECK-OBJ: beqi s0, 22, 0x1006 +# CHECK-ASM: beqi s0, 22, 4094 +# CHECK-ENCODING: [0xe3,0x2f,0x64,0x7f] +# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}} +# CHECK-UNKNOWN: 7f642fe3 +beqi s1, 11, -4096 +# CHECK-OBJ: beqi s1, 11, 0xfffff00c +# CHECK-ASM: beqi s1, 11, -4096 +# CHECK-ENCODING: [0x63,0xa0,0xb4,0x80] +# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}} +# CHECK-UNKNOWN: 80b4a063 +bnei a0, 1, 1024 +# CHECK-OBJ: bnei a0, 1, 0x410 +# CHECK-ASM: bnei a0, 1, 1024 +# CHECK-ENCODING: [0x63,0x30,0x15,0x40] +# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}} +# CHECK-UNKNOWN: 40153063 +bnei a5, -1, -1024 +# CHECK-OBJ: bnei a5, -1, 0xfffffc14 +# CHECK-ASM: bnei a5, -1, -1024 +# CHECK-ENCODING: [0xe3,0xb0,0x07,0xc0] +# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}} +# CHECK-UNKNOWN: c007b0e3 +bnei s0, 22, 0xffe +# CHECK-OBJ: bnei s0, 22, 0x1016 +# CHECK-ASM: bnei s0, 22, 4094 +# CHECK-ENCODING: [0xe3,0x3f,0x64,0x7f] +# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}} +# CHECK-UNKNOWN: 7f643fe3 +bnei s1, 11, -4096 +# CHECK-OBJ: bnei s1, 11, 0xfffff01c +# CHECK-ASM: bnei s1, 11, -4096 +# CHECK-ENCODING: [0x63,0xb0,0xb4,0x80] +# CHECK-ERROR: instruction requires the following: 'Zibi' (Branch with Immediate){{$}} +# CHECK-UNKNOWN: 80b4b063 diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index febd06ea51739..e953c0d11590b 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -1184,6 +1184,7 @@ R"(All available -march extensions for RISC-V Experimental extensions p 0.15 + zibi 0.1 zicfilp 1.0 This is a long dummy description zicfiss 1.0 zalasr 0.1 From d7b7b9cd6d12a8cbc35fba4ecfd0a557011e9cdd Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 12 Sep 2025 08:41:00 +0100 Subject: [PATCH 084/734] Revert "[compiler-rt][test] Use packaging.version.Version to compare glibc versions" (#158230) Reverts llvm/llvm-project#142596 Got reports that some tests previously skipped are running again and failing. --- compiler-rt/test/lit.common.cfg.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 7734491310edf..e2e815444dcf9 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -713,9 +713,9 @@ def add_glibc_versions(ver_string): if config.android: return - from packaging.version import Version + from distutils.version import LooseVersion - ver = Version(ver_string) + ver = LooseVersion(ver_string) any_glibc = False for required in [ "2.19", @@ -727,7 +727,7 @@ def add_glibc_versions(ver_string): "2.38", "2.40", ]: - if ver >= Version(required): + if ver >= LooseVersion(required): config.available_features.add("glibc-" + required) any_glibc = True if any_glibc: From 59102db770183835f549c58ad6954f39a407fe5e Mon Sep 17 00:00:00 2001 From: kper Date: Fri, 12 Sep 2025 09:43:47 +0200 Subject: [PATCH 085/734] [InstCombine] Added optimisation for trunc (Negated Pow2 >> x) to i1 (#157998) Follow up of https://github.com/llvm/llvm-project/pull/157030 ``` trunc ( lshr i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is negative power of 2 trunc ( ashr i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is negative power of 2 ``` General proof: lshr: https://alive2.llvm.org/ce/z/vVfaJc ashr: https://alive2.llvm.org/ce/z/8aAcgD --- .../InstCombine/InstCombineCasts.cpp | 14 +++- .../test/Transforms/InstCombine/trunc-lshr.ll | 74 +++++++++++++++++++ 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index ccf918f0b6dbe..9ca8194b44f8f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -977,8 +977,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { // trunc ( OP i8 C1, V1) to i1 -> icmp eq V1, log_2(C1) iff C1 is power of 2 if (DestWidth == 1 && match(Src, m_Shr(m_Power2(C1), m_Value(V1)))) { Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero()); - Value *Icmp = Builder.CreateICmpEQ(V1, Right); - return replaceInstUsesWith(Trunc, Icmp); + return new ICmpInst(ICmpInst::ICMP_EQ, V1, Right); } // OP = { lshr, ashr } @@ -986,8 +985,15 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { // power of 2 if (DestWidth == 1 && match(Src, m_Shr(m_LowBitMask(C1), m_Value(V1)))) { Value *Right = ConstantInt::get(V1->getType(), C1->countr_one()); - Value *Icmp = Builder.CreateICmpULT(V1, Right); - return replaceInstUsesWith(Trunc, Icmp); + return new ICmpInst(ICmpInst::ICMP_ULT, V1, Right); + } + + // OP = { lshr, ashr } + // trunc ( OP i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is + // negative power of 2 + if (DestWidth == 1 && match(Src, m_Shr(m_NegatedPower2(C1), m_Value(V1)))) { + Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero()); + return new ICmpInst(ICmpInst::ICMP_UGE, V1, Right); } return Changed ? &Trunc : nullptr; diff --git a/llvm/test/Transforms/InstCombine/trunc-lshr.ll b/llvm/test/Transforms/InstCombine/trunc-lshr.ll index c443b35cb1c1e..0e996e5d017fe 100644 --- a/llvm/test/Transforms/InstCombine/trunc-lshr.ll +++ b/llvm/test/Transforms/InstCombine/trunc-lshr.ll @@ -219,3 +219,77 @@ define i1 @negative_test_fold_ashr(i8 %x) { %trunc = trunc i8 %ashr to i1 ret i1 %trunc } + +define i1 @fold_lshr_negated_power_of_2(i8 %x) { +; CHECK-LABEL: define i1 @fold_lshr_negated_power_of_2( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TRUNC:%.*]] = icmp ugt i8 [[X]], 3 +; CHECK-NEXT: ret i1 [[TRUNC]] +; + %lshr = lshr i8 -16, %x + %trunc = trunc i8 %lshr to i1 + ret i1 %trunc +} + +define i1 @fold_ashr_negated_power_of_2(i8 %x) { +; CHECK-LABEL: define i1 @fold_ashr_negated_power_of_2( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TRUNC:%.*]] = icmp ugt i8 [[X]], 3 +; CHECK-NEXT: ret i1 [[TRUNC]] +; + %ashr = ashr i8 -16, %x + %trunc = trunc i8 %ashr to i1 + ret i1 %trunc +} + +define i1 @fold_lshr_negated_power_of_2_multi_use(i8 %x) { +; CHECK-LABEL: define i1 @fold_lshr_negated_power_of_2_multi_use( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[LSHR:%.*]] = lshr i8 -16, [[X]] +; CHECK-NEXT: call void @use(i8 [[LSHR]]) +; CHECK-NEXT: [[TRUNC:%.*]] = icmp ugt i8 [[X]], 3 +; CHECK-NEXT: ret i1 [[TRUNC]] +; + %lshr = lshr i8 -16, %x + call void @use(i8 %lshr) + %trunc = trunc i8 %lshr to i1 + ret i1 %trunc +} + +define i1 @fold_ashr_negated_power_of_2_multi_use(i8 %x) { +; CHECK-LABEL: define i1 @fold_ashr_negated_power_of_2_multi_use( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[ASHR:%.*]] = ashr i8 -16, [[X]] +; CHECK-NEXT: call void @use(i8 [[ASHR]]) +; CHECK-NEXT: [[TRUNC:%.*]] = icmp ugt i8 [[X]], 3 +; CHECK-NEXT: ret i1 [[TRUNC]] +; + %ashr = ashr i8 -16, %x + call void @use(i8 %ashr) + %trunc = trunc i8 %ashr to i1 + ret i1 %trunc +} + +define i1 @negative_test_fold_lshr_negated_power_of_2(i8 %x) { +; CHECK-LABEL: define i1 @negative_test_fold_lshr_negated_power_of_2( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[LSHR:%.*]] = lshr i8 -17, [[X]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[LSHR]] to i1 +; CHECK-NEXT: ret i1 [[TRUNC]] +; + %lshr = lshr i8 -17, %x + %trunc = trunc i8 %lshr to i1 + ret i1 %trunc +} + +define i1 @negative_test_fold_ashr_negated_power_of_2(i8 %x) { +; CHECK-LABEL: define i1 @negative_test_fold_ashr_negated_power_of_2( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[ASHR1:%.*]] = lshr i8 -17, [[X]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[ASHR1]] to i1 +; CHECK-NEXT: ret i1 [[TRUNC]] +; + %ashr = ashr i8 -17, %x + %trunc = trunc i8 %ashr to i1 + ret i1 %trunc +} From 7982980e078481fb1c52360691206f10160b1e5a Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Fri, 12 Sep 2025 09:51:55 +0200 Subject: [PATCH 086/734] [AMDGPUPromoteAlloca][NFC] Avoid unnecessary APInt/int64_t conversions (#157864) Follow-up to #157682 --- .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 7dbe1235a98b5..ddabd25894414 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -406,6 +406,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, SmallVector &NewInsts) { // TODO: Extracting a "multiple of X" from a GEP might be a useful generic // helper. + LLVMContext &Ctx = GEP->getContext(); unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType()); SmallMapVector VarOffsets; APInt ConstOffset(BW, 0); @@ -438,27 +439,24 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, assert(CurPtr == Alloca && "GEP not based on alloca"); - unsigned VecElemSize = DL.getTypeAllocSize(VecElemTy); + int64_t VecElemSize = DL.getTypeAllocSize(VecElemTy); if (VarOffsets.size() > 1) return nullptr; APInt IndexQuot; - APInt Rem; - APInt::sdivrem(ConstOffset, APInt(ConstOffset.getBitWidth(), VecElemSize), - IndexQuot, Rem); - if (!Rem.isZero()) + int64_t Rem; + APInt::sdivrem(ConstOffset, VecElemSize, IndexQuot, Rem); + if (Rem != 0) return nullptr; if (VarOffsets.size() == 0) - return ConstantInt::get(GEP->getContext(), IndexQuot); + return ConstantInt::get(Ctx, IndexQuot); IRBuilder<> Builder(GEP); const auto &VarOffset = VarOffsets.front(); APInt OffsetQuot; - APInt::sdivrem(VarOffset.second, - APInt(VarOffset.second.getBitWidth(), VecElemSize), OffsetQuot, - Rem); - if (!Rem.isZero() || OffsetQuot.isZero()) + APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem); + if (Rem != 0 || OffsetQuot.isZero()) return nullptr; Value *Offset = VarOffset.first; @@ -468,7 +466,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, if (!OffsetQuot.isOne()) { ConstantInt *ConstMul = - ConstantInt::get(OffsetType, OffsetQuot.getSExtValue()); + ConstantInt::get(Ctx, OffsetQuot.sext(OffsetType->getBitWidth())); Offset = Builder.CreateMul(Offset, ConstMul); if (Instruction *NewInst = dyn_cast(Offset)) NewInsts.push_back(NewInst); @@ -477,7 +475,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, return Offset; ConstantInt *ConstIndex = - ConstantInt::get(OffsetType, IndexQuot.getSExtValue()); + ConstantInt::get(Ctx, IndexQuot.sext(OffsetType->getBitWidth())); Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex); if (Instruction *NewInst = dyn_cast(IndexAdd)) NewInsts.push_back(NewInst); From 5374f16270f02fdbedbbba96951a8b9ffd9c482f Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 12 Sep 2025 09:08:10 +0100 Subject: [PATCH 087/734] [libcxx][ci][NFC] Remove commented install line and disutils reference (#158015) 76667c768e6403e71718340a946e6f2f356bf745 added distutils because "spawn" was used, which I then removed in 268a4b0a451432833d3b398c7182d133c865dff5. I removed it as part of removing all uses of distutils in llvm-project, tracked in #54337. Python has removed distutils in its latest versions. Distutils was not being installed in the docker image but just mentioned in a commented out line. I think this line was leftover from when it was reformated into the multi-line command above. So I'm removing the whole line and relocating the comments a bit. --- libcxx/utils/ci/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile index 79e11569c0d08..8e1c341c10b92 100644 --- a/libcxx/utils/ci/Dockerfile +++ b/libcxx/utils/ci/Dockerfile @@ -76,6 +76,9 @@ RUN sudo apt-get update \ && sudo apt-get install -y \ tzdata +# Install various tools used by the build or the test suite +# TODO add ninja-build once 1.11 is available in Ubuntu, also remove the manual +# installation below. RUN sudo apt-get update \ && sudo apt-get install -y \ bash \ @@ -108,9 +111,6 @@ RUN sudo apt-get update \ xz-utils \ && sudo rm -rf /var/lib/apt/lists/* -# Install various tools used by the build or the test suite -#RUN apt-get update && apt-get install -y ninja-build python3 python3-distutils python3-psutil git gdb ccache -# TODO add ninja-build once 1.11 is available in Ubuntu, also remove the manual installation. RUN < Date: Fri, 12 Sep 2025 04:17:40 -0400 Subject: [PATCH 088/734] [clang-format] Add an option to format numeric literal case (#151590) Some languages have the flexibility to use upper or lower case characters interchangeably in integer and float literal definitions. I'd like to be able to enforce a consistent case style in one of my projects, so I added this clang-format style option to control it. With this .clang-format configuration: ```yaml NumericLiteralCaseStyle: UpperCasePrefix: Never UpperCaseHexDigit: Always UpperCaseSuffix: Never ``` This line of code: ```C unsigned long long 0XdEaDbEeFUll; ``` gets reformatted into this line of code: ```C unsigned long long 0xDEAFBEEFull; ``` ----- I'm new to this project, so please let me know if I missed something in the process. I modeled this PR from [IntegerLiteralSeparatorFixer](https://reviews.llvm.org/D140543) --- clang/docs/ClangFormatStyleOptions.rst | 107 ++++++ clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/Format/Format.h | 68 ++++ clang/lib/Format/CMakeLists.txt | 1 + clang/lib/Format/Format.cpp | 29 ++ clang/lib/Format/NumericLiteralCaseFixer.cpp | 177 +++++++++ clang/lib/Format/NumericLiteralCaseFixer.h | 32 ++ clang/unittests/Format/CMakeLists.txt | 1 + .../Format/NumericLiteralCaseTest.cpp | 346 ++++++++++++++++++ 9 files changed, 763 insertions(+) create mode 100644 clang/lib/Format/NumericLiteralCaseFixer.cpp create mode 100644 clang/lib/Format/NumericLiteralCaseFixer.h create mode 100644 clang/unittests/Format/NumericLiteralCaseTest.cpp diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 3ac9e3795cae7..6be4d512bda6a 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -5079,6 +5079,113 @@ the configuration (without a prefix: ``Auto``). For example: TESTSUITE +.. _NumericLiteralCase: + +**NumericLiteralCase** (``NumericLiteralCaseStyle``) :versionbadge:`clang-format 22` :ref:`¶ ` + Capitalization style for numeric literals. + + Nested configuration flags: + + Separate control for each numeric literal component. + + For example, the config below will leave exponent letters alone, reformat + hexadecimal digits in lowercase, reformat numeric literal prefixes in + uppercase, and reformat suffixes in lowercase. + + .. code-block:: c++ + + NumericLiteralCase: + ExponentLetter: Leave + HexDigit: Lower + Prefix: Upper + Suffix: Lower + + * ``NumericLiteralComponentStyle ExponentLetter`` + Format floating point exponent separator letter case. + + .. code-block:: c++ + + float a = 6.02e23 + 1.0E10; // Leave + float a = 6.02E23 + 1.0E10; // Upper + float a = 6.02e23 + 1.0e10; // Lower + + Possible values: + + * ``NLCS_Leave`` (in configuration: ``Leave``) + Leave this component of the literal as is. + + * ``NLCS_Upper`` (in configuration: ``Upper``) + Format this component with uppercase characters. + + * ``NLCS_Lower`` (in configuration: ``Lower``) + Format this component with lowercase characters. + + + * ``NumericLiteralComponentStyle HexDigit`` + Format hexadecimal digit case. + + .. code-block:: c++ + + a = 0xaBcDeF; // Leave + a = 0xABCDEF; // Upper + a = 0xabcdef; // Lower + + Possible values: + + * ``NLCS_Leave`` (in configuration: ``Leave``) + Leave this component of the literal as is. + + * ``NLCS_Upper`` (in configuration: ``Upper``) + Format this component with uppercase characters. + + * ``NLCS_Lower`` (in configuration: ``Lower``) + Format this component with lowercase characters. + + + * ``NumericLiteralComponentStyle Prefix`` + Format integer prefix case. + + .. code-block:: c++ + + a = 0XF0 | 0b1; // Leave + a = 0XF0 | 0B1; // Upper + a = 0xF0 | 0b1; // Lower + + Possible values: + + * ``NLCS_Leave`` (in configuration: ``Leave``) + Leave this component of the literal as is. + + * ``NLCS_Upper`` (in configuration: ``Upper``) + Format this component with uppercase characters. + + * ``NLCS_Lower`` (in configuration: ``Lower``) + Format this component with lowercase characters. + + + * ``NumericLiteralComponentStyle Suffix`` + Format suffix case. This option excludes case-sensitive reserved + suffixes, such as ``min`` in C++. + + .. code-block:: c++ + + a = 1uLL; // Leave + a = 1ULL; // Upper + a = 1ull; // Lower + + Possible values: + + * ``NLCS_Leave`` (in configuration: ``Leave``) + Leave this component of the literal as is. + + * ``NLCS_Upper`` (in configuration: ``Upper``) + Format this component with uppercase characters. + + * ``NLCS_Lower`` (in configuration: ``Lower``) + Format this component with lowercase characters. + + + .. _ObjCBinPackProtocolList: **ObjCBinPackProtocolList** (``BinPackStyle``) :versionbadge:`clang-format 7` :ref:`¶ ` diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 060f3d982b850..4868714d898ec 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -468,6 +468,8 @@ AST Matchers clang-format ------------ - Add ``SpaceInEmptyBraces`` option and set it to ``Always`` for WebKit style. +- Add ``NumericLiteralCase`` option for enforcing character case in numeric + literals. libclang -------- diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 5dfdb23594610..03cff5f8cfb66 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -3558,6 +3558,73 @@ struct FormatStyle { /// \version 9 std::vector NamespaceMacros; + /// Control over each component in a numeric literal. + enum NumericLiteralComponentStyle : int8_t { + /// Leave this component of the literal as is. + NLCS_Leave, + /// Format this component with uppercase characters. + NLCS_Upper, + /// Format this component with lowercase characters. + NLCS_Lower, + }; + + /// Separate control for each numeric literal component. + /// + /// For example, the config below will leave exponent letters alone, reformat + /// hexadecimal digits in lowercase, reformat numeric literal prefixes in + /// uppercase, and reformat suffixes in lowercase. + /// \code + /// NumericLiteralCase: + /// ExponentLetter: Leave + /// HexDigit: Lower + /// Prefix: Upper + /// Suffix: Lower + /// \endcode + struct NumericLiteralCaseStyle { + /// Format floating point exponent separator letter case. + /// \code + /// float a = 6.02e23 + 1.0E10; // Leave + /// float a = 6.02E23 + 1.0E10; // Upper + /// float a = 6.02e23 + 1.0e10; // Lower + /// \endcode + NumericLiteralComponentStyle ExponentLetter; + /// Format hexadecimal digit case. + /// \code + /// a = 0xaBcDeF; // Leave + /// a = 0xABCDEF; // Upper + /// a = 0xabcdef; // Lower + /// \endcode + NumericLiteralComponentStyle HexDigit; + /// Format integer prefix case. + /// \code + /// a = 0XF0 | 0b1; // Leave + /// a = 0XF0 | 0B1; // Upper + /// a = 0xF0 | 0b1; // Lower + /// \endcode + NumericLiteralComponentStyle Prefix; + /// Format suffix case. This option excludes case-sensitive reserved + /// suffixes, such as ``min`` in C++. + /// \code + /// a = 1uLL; // Leave + /// a = 1ULL; // Upper + /// a = 1ull; // Lower + /// \endcode + NumericLiteralComponentStyle Suffix; + + bool operator==(const NumericLiteralCaseStyle &R) const { + return ExponentLetter == R.ExponentLetter && HexDigit == R.HexDigit && + Prefix == R.Prefix && Suffix == R.Suffix; + } + + bool operator!=(const NumericLiteralCaseStyle &R) const { + return !(*this == R); + } + }; + + /// Capitalization style for numeric literals. + /// \version 22 + NumericLiteralCaseStyle NumericLiteralCase; + /// Controls bin-packing Objective-C protocol conformance list /// items into as few lines as possible when they go over ``ColumnLimit``. /// @@ -5469,6 +5536,7 @@ struct FormatStyle { MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep && NamespaceIndentation == R.NamespaceIndentation && NamespaceMacros == R.NamespaceMacros && + NumericLiteralCase == R.NumericLiteralCase && ObjCBinPackProtocolList == R.ObjCBinPackProtocolList && ObjCBlockIndentWidth == R.ObjCBlockIndentWidth && ObjCBreakBeforeNestedBlockParam == diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt index 24f435d2caee1..50c0683dc9b7f 100644 --- a/clang/lib/Format/CMakeLists.txt +++ b/clang/lib/Format/CMakeLists.txt @@ -13,6 +13,7 @@ add_clang_library(clangFormat MacroExpander.cpp MatchFilePath.cpp NamespaceEndCommentsFixer.cpp + NumericLiteralCaseFixer.cpp NumericLiteralInfo.cpp ObjCPropertyAttributeOrderFixer.cpp QualifierAlignmentFixer.cpp diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index e3b22cdabaccd..f095d2c18cfcf 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -16,6 +16,7 @@ #include "DefinitionBlockSeparator.h" #include "IntegerLiteralSeparatorFixer.h" #include "NamespaceEndCommentsFixer.h" +#include "NumericLiteralCaseFixer.h" #include "ObjCPropertyAttributeOrderFixer.h" #include "QualifierAlignmentFixer.h" #include "SortJavaScriptImports.h" @@ -472,6 +473,25 @@ struct ScalarEnumerationTraits { } }; +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &IO, + FormatStyle::NumericLiteralComponentStyle &Value) { + IO.enumCase(Value, "Leave", FormatStyle::NLCS_Leave); + IO.enumCase(Value, "Upper", FormatStyle::NLCS_Upper); + IO.enumCase(Value, "Lower", FormatStyle::NLCS_Lower); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, FormatStyle::NumericLiteralCaseStyle &Value) { + IO.mapOptional("ExponentLetter", Value.ExponentLetter); + IO.mapOptional("HexDigit", Value.HexDigit); + IO.mapOptional("Prefix", Value.Prefix); + IO.mapOptional("Suffix", Value.Suffix); + } +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, FormatStyle::OperandAlignmentStyle &Value) { IO.enumCase(Value, "DontAlign", FormatStyle::OAS_DontAlign); @@ -1121,6 +1141,7 @@ template <> struct MappingTraits { IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); IO.mapOptional("NamespaceMacros", Style.NamespaceMacros); + IO.mapOptional("NumericLiteralCase", Style.NumericLiteralCase); IO.mapOptional("ObjCBinPackProtocolList", Style.ObjCBinPackProtocolList); IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); IO.mapOptional("ObjCBreakBeforeNestedBlockParam", @@ -1653,6 +1674,10 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.LineEnding = FormatStyle::LE_DeriveLF; LLVMStyle.MaxEmptyLinesToKeep = 1; LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; + LLVMStyle.NumericLiteralCase = {/*ExponentLetter=*/FormatStyle::NLCS_Leave, + /*HexDigit=*/FormatStyle::NLCS_Leave, + /*Prefix=*/FormatStyle::NLCS_Leave, + /*Suffix=*/FormatStyle::NLCS_Leave}; LLVMStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Auto; LLVMStyle.ObjCBlockIndentWidth = 2; LLVMStyle.ObjCBreakBeforeNestedBlockParam = true; @@ -3890,6 +3915,10 @@ reformat(const FormatStyle &Style, StringRef Code, return IntegerLiteralSeparatorFixer().process(Env, Expanded); }); + Passes.emplace_back([&](const Environment &Env) { + return NumericLiteralCaseFixer().process(Env, Expanded); + }); + if (Style.isCpp()) { if (Style.QualifierAlignment != FormatStyle::QAS_Leave) addQualifierAlignmentFixerPasses(Expanded, Passes); diff --git a/clang/lib/Format/NumericLiteralCaseFixer.cpp b/clang/lib/Format/NumericLiteralCaseFixer.cpp new file mode 100644 index 0000000000000..b58b3c7ee0189 --- /dev/null +++ b/clang/lib/Format/NumericLiteralCaseFixer.cpp @@ -0,0 +1,177 @@ +//===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements NumericLiteralCaseFixer that standardizes character +/// case within numeric literals. +/// +//===----------------------------------------------------------------------===// + +#include "NumericLiteralCaseFixer.h" +#include "NumericLiteralInfo.h" + +#include "llvm/ADT/StringExtras.h" + +#include + +namespace clang { +namespace format { + +static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) { + // Check if language is supported. + switch (Style.Language) { + case FormatStyle::LK_C: + case FormatStyle::LK_Cpp: + case FormatStyle::LK_ObjC: + case FormatStyle::LK_CSharp: + case FormatStyle::LK_Java: + case FormatStyle::LK_JavaScript: + break; + default: + return false; + } + + // Check if style options are set. + const auto &Option = Style.NumericLiteralCase; + const auto Leave = FormatStyle::NLCS_Leave; + return Option.Prefix != Leave || Option.HexDigit != Leave || + Option.ExponentLetter != Leave || Option.Suffix != Leave; +} + +static std::string +transformComponent(StringRef Component, + FormatStyle::NumericLiteralComponentStyle ConfigValue) { + switch (ConfigValue) { + case FormatStyle::NLCS_Upper: + return Component.upper(); + case FormatStyle::NLCS_Lower: + return Component.lower(); + default: + // Covers FormatStyle::NLCS_Leave. + return Component.str(); + } +} + +/// Test if Suffix matches a C++ literal reserved by the library. +/// Matches against all suffixes reserved in the C++23 standard. +static bool matchesReservedSuffix(StringRef Suffix) { + static constexpr std::array SortedReservedSuffixes = { + "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y", + }; + + // This can be static_assert when we have access to constexpr is_sorted in + // C++ 20. + assert(llvm::is_sorted(SortedReservedSuffixes) && + "Must be sorted as precondition for lower_bound()."); + + auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix); + if (entry == SortedReservedSuffixes.cend()) + return false; + return *entry == Suffix; +} + +static std::string format(StringRef NumericLiteral, const FormatStyle &Style) { + const char Separator = Style.isCpp() ? '\'' : '_'; + const NumericLiteralInfo Info(NumericLiteral, Separator); + const bool HasBaseLetter = Info.BaseLetterPos != StringRef::npos; + const bool HasExponent = Info.ExponentLetterPos != StringRef::npos; + const bool HasSuffix = Info.SuffixPos != StringRef::npos; + + std::string Formatted; + + if (HasBaseLetter) { + Formatted += + transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos), + Style.NumericLiteralCase.Prefix); + } + // Reformat this slice as HexDigit whether or not the digit has hexadecimal + // characters because binary/decimal/octal digits are unchanged. + Formatted += transformComponent( + NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0, + HasExponent ? Info.ExponentLetterPos + : HasSuffix ? Info.SuffixPos + : NumericLiteral.size()), + Style.NumericLiteralCase.HexDigit); + + if (HasExponent) { + Formatted += transformComponent( + NumericLiteral.slice(Info.ExponentLetterPos, + HasSuffix ? Info.SuffixPos + : NumericLiteral.size()), + Style.NumericLiteralCase.ExponentLetter); + } + + if (HasSuffix) { + StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos); + if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') { + // In C++, it is idiomatic, but NOT standardized to define user-defined + // literals with a leading '_'. Omit user defined literals and standard + // reserved suffixes from transformation. + Formatted += Suffix.str(); + } else { + Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix); + } + } + + return Formatted; +} + +std::pair +NumericLiteralCaseFixer::process(const Environment &Env, + const FormatStyle &Style) { + if (!isNumericLiteralCaseFixerNeeded(Style)) + return {}; + + const auto &SourceMgr = Env.getSourceManager(); + AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); + + const auto ID = Env.getFileID(); + const auto LangOpts = getFormattingLangOpts(Style); + Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); + Lex.SetCommentRetentionState(true); + + Token Tok; + tooling::Replacements Result; + + for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) { + // Skip tokens that are too small to contain a formattable literal. + // Size=2 is the smallest possible literal that could contain formattable + // components, for example "1u". + auto Length = Tok.getLength(); + if (Length < 2) + continue; + + // Service clang-format off/on comments. + auto Location = Tok.getLocation(); + auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); + if (Tok.is(tok::comment)) { + if (isClangFormatOff(Text)) + Skip = true; + else if (isClangFormatOn(Text)) + Skip = false; + continue; + } + + if (Skip || Tok.isNot(tok::numeric_constant) || + !AffectedRangeMgr.affectsCharSourceRange( + CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) { + continue; + } + + const auto Formatted = format(Text, Style); + if (Formatted != Text) { + cantFail(Result.add( + tooling::Replacement(SourceMgr, Location, Length, Formatted))); + } + } + + return {Result, 0}; +} + +} // namespace format +} // namespace clang diff --git a/clang/lib/Format/NumericLiteralCaseFixer.h b/clang/lib/Format/NumericLiteralCaseFixer.h new file mode 100644 index 0000000000000..ac3ac30d1d19a --- /dev/null +++ b/clang/lib/Format/NumericLiteralCaseFixer.h @@ -0,0 +1,32 @@ +//===--- NumericLiteralCaseFixer.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares NumericLiteralCaseFixer that standardizes character case +/// within numeric literals. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_NUMERICLITERALCASEFIXER_H +#define LLVM_CLANG_LIB_FORMAT_NUMERICLITERALCASEFIXER_H + +#include "TokenAnalyzer.h" + +namespace clang { +namespace format { + +class NumericLiteralCaseFixer { +public: + std::pair process(const Environment &Env, + const FormatStyle &Style); +}; + +} // end namespace format +} // end namespace clang + +#endif diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt index 5e5a7a0552993..03fff988d4663 100644 --- a/clang/unittests/Format/CMakeLists.txt +++ b/clang/unittests/Format/CMakeLists.txt @@ -28,6 +28,7 @@ add_distinct_clang_unittest(FormatTests MacroExpanderTest.cpp MatchFilePathTest.cpp NamespaceEndCommentsFixerTest.cpp + NumericLiteralCaseTest.cpp NumericLiteralInfoTest.cpp ObjCPropertyAttributeOrderFixerTest.cpp QualifierFixerTest.cpp diff --git a/clang/unittests/Format/NumericLiteralCaseTest.cpp b/clang/unittests/Format/NumericLiteralCaseTest.cpp new file mode 100644 index 0000000000000..ecd230d73f692 --- /dev/null +++ b/clang/unittests/Format/NumericLiteralCaseTest.cpp @@ -0,0 +1,346 @@ +//===- unittest/Format/NumericLiteralCaseTest.cpp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatTestBase.h" + +#define DEBUG_TYPE "numeric-literal-case-test" + +namespace clang { +namespace format { +namespace test { +namespace { + +class NumericLiteralCaseTest : public FormatTestBase {}; + +TEST_F(NumericLiteralCaseTest, Prefix) { + constexpr StringRef Bin0("b = 0b0'10'010uL;"); + constexpr StringRef Bin1("b = 0B010'010Ul;"); + constexpr StringRef Hex0("b = 0xdead'BEEFuL;"); + constexpr StringRef Hex1("b = 0Xdead'BEEFUl;"); + verifyFormat(Bin0); + verifyFormat(Bin1); + verifyFormat(Hex0); + verifyFormat(Hex1); + + auto Style = getLLVMStyle(); + EXPECT_EQ(Style.NumericLiteralCase.Prefix, FormatStyle::NLCS_Leave); + EXPECT_EQ(Style.NumericLiteralCase.HexDigit, FormatStyle::NLCS_Leave); + EXPECT_EQ(Style.NumericLiteralCase.ExponentLetter, FormatStyle::NLCS_Leave); + EXPECT_EQ(Style.NumericLiteralCase.Suffix, FormatStyle::NLCS_Leave); + + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper; + verifyFormat("b = 0B0'10'010uL;", Bin0, Style); + verifyFormat(Bin1, Style); + verifyFormat("b = 0Xdead'BEEFuL;", Hex0, Style); + verifyFormat(Hex1, Style); + verifyFormat("i = 0XaBcD.a0Ebp123F;", Style); + verifyFormat("j = 0XaBcD.a0EbP123f;", Style); + + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower; + verifyFormat(Bin0, Style); + verifyFormat("b = 0b010'010Ul;", Bin1, Style); + verifyFormat(Hex0, Style); + verifyFormat("b = 0xdead'BEEFUl;", Hex1, Style); +} + +TEST_F(NumericLiteralCaseTest, HexDigit) { + constexpr StringRef A("a = 0xaBc0'123fuL;"); + constexpr StringRef B("b = 0XaBc0'123FUl;"); + constexpr StringRef C("c = 0xa'Bc.0p12'3f32;"); + constexpr StringRef D("d = 0xa'Bc.0P12'3F128;"); + constexpr StringRef E("e = 0b0011'00Ull;"); + constexpr StringRef F("f = 0B0100'000zu;"); + constexpr StringRef G("g = 0.123e-19f;"); + constexpr StringRef H("h = 0.12'3E-19F16;"); + constexpr StringRef I("i = 0x.0000aBcp12'3F128;"); + constexpr StringRef J("j = 0xaa1'fP12'3F128;"); + constexpr StringRef K("k = 0x0;"); + constexpr StringRef L("l = 0xA;"); + verifyFormat(A); + verifyFormat(B); + verifyFormat(C); + verifyFormat(D); + verifyFormat(E); + verifyFormat(F); + verifyFormat(G); + verifyFormat(H); + verifyFormat(I); + verifyFormat(J); + verifyFormat(K); + verifyFormat(L); + + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Upper; + verifyFormat("a = 0xABC0'123FuL;", A, Style); + verifyFormat("b = 0XABC0'123FUl;", B, Style); + verifyFormat("c = 0xA'BC.0p12'3f32;", C, Style); + verifyFormat("d = 0xA'BC.0P12'3F128;", D, Style); + verifyFormat(E, Style); + verifyFormat(F, Style); + verifyFormat(G, Style); + verifyFormat(H, Style); + verifyFormat("i = 0x.0000ABCp12'3F128;", I, Style); + verifyFormat("j = 0xAA1'FP12'3F128;", J, Style); + verifyFormat(K, Style); + verifyFormat(L, Style); + + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Lower; + verifyFormat("a = 0xabc0'123fuL;", A, Style); + verifyFormat("b = 0Xabc0'123fUl;", B, Style); + verifyFormat("c = 0xa'bc.0p12'3f32;", C, Style); + verifyFormat("d = 0xa'bc.0P12'3F128;", D, Style); + verifyFormat(E, Style); + verifyFormat(F, Style); + verifyFormat(G, Style); + verifyFormat(H, Style); + verifyFormat("i = 0x.0000abcp12'3F128;", I, Style); + verifyFormat("j = 0xaa1'fP12'3F128;", J, Style); + verifyFormat(K, Style); + verifyFormat("l = 0xa;", Style); +} + +TEST_F(NumericLiteralCaseTest, ExponentLetter) { + constexpr StringRef A("a = .0'01e-19f;"); + constexpr StringRef B("b = .00'1E2F;"); + constexpr StringRef C("c = 10'2.e99;"); + constexpr StringRef D("d = 123.456E-1;"); + constexpr StringRef E("e = 0x12abEe3.456p-10'0;"); + constexpr StringRef F("f = 0x.deEfP23;"); + constexpr StringRef G("g = 0xe0E1.p-1;"); + verifyFormat(A); + verifyFormat(B); + verifyFormat(C); + verifyFormat(D); + verifyFormat(E); + verifyFormat(F); + verifyFormat(G); + + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Lower; + verifyFormat(A, Style); + verifyFormat("b = .00'1e2F;", B, Style); + verifyFormat(C, Style); + verifyFormat("d = 123.456e-1;", D, Style); + verifyFormat(E, Style); + verifyFormat("f = 0x.deEfp23;", F, Style); + verifyFormat(G, Style); + + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Upper; + verifyFormat("a = .0'01E-19f;", A, Style); + verifyFormat(B, Style); + verifyFormat("c = 10'2.E99;", C, Style); + verifyFormat(D, Style); + verifyFormat("e = 0x12abEe3.456P-10'0;", E, Style); + verifyFormat(F, Style); + verifyFormat("g = 0xe0E1.P-1;", G, Style); +} + +TEST_F(NumericLiteralCaseTest, IntegerSuffix) { + constexpr StringRef A("a = 102u;"); + constexpr StringRef B("b = 0177U;"); + constexpr StringRef C("c = 0b101'111llU;"); + constexpr StringRef D("d = 0xdead'BeefuZ;"); + constexpr StringRef E("e = 3lU;"); + constexpr StringRef F("f = 1zu;"); + constexpr StringRef G("g = 0uLL;"); + constexpr StringRef H("h = 10'233'213'0101uLL;"); + verifyFormat(A); + verifyFormat(B); + verifyFormat(C); + verifyFormat(D); + verifyFormat(E); + verifyFormat(F); + verifyFormat(G); + verifyFormat(H); + + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + verifyFormat(A, Style); + verifyFormat("b = 0177u;", B, Style); + verifyFormat("c = 0b101'111llu;", C, Style); + verifyFormat("d = 0xdead'Beefuz;", D, Style); + verifyFormat("e = 3lu;", E, Style); + verifyFormat(F, Style); + verifyFormat("g = 0ull;", G, Style); + verifyFormat("h = 10'233'213'0101ull;", H, Style); + + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper; + verifyFormat("a = 102U;", A, Style); + verifyFormat(B, Style); + verifyFormat("c = 0b101'111LLU;", C, Style); + verifyFormat("d = 0xdead'BeefUZ;", D, Style); + verifyFormat("e = 3LU;", E, Style); + verifyFormat("f = 1ZU;", F, Style); + verifyFormat("g = 0ULL;", G, Style); + verifyFormat("h = 10'233'213'0101ULL;", H, Style); +} + +TEST_F(NumericLiteralCaseTest, FloatingPointSuffix) { + auto Style = getLLVMStyle(); + // Floating point literals without suffixes. + constexpr std::array FloatingPointStatements = { + "a = 0.", "b = 1.0", "c = .123'45E-10", + "d = 12'3.0e1", "e = 0Xa0eE.P10", "f = 0xeE01.aFf3p6", + }; + + // All legal floating-point literal suffixes defined in the C++23 standard in + // lowercase. + constexpr std::array FloatingPointSuffixes = { + "f", "l", "f16", "f32", "f64", "f128", "bf16", + }; + + // Test all combinations of literals with suffixes. + for (const auto &Statement : FloatingPointStatements) { + for (const auto &Suffix : FloatingPointSuffixes) { + const auto LowerLine = Statement.str() + Suffix.str() + ";"; + const auto UpperLine = Statement.str() + Suffix.upper() + ";"; + + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Leave; + verifyFormat(LowerLine, Style); + verifyFormat(UpperLine, Style); + + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + verifyFormat(LowerLine, Style); + verifyFormat(LowerLine, UpperLine, Style); + + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper; + verifyFormat(UpperLine, LowerLine, Style); + verifyFormat(UpperLine, Style); + } + } +} + +TEST_F(NumericLiteralCaseTest, CppStandardAndUserDefinedLiteralsAreUntouched) { + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper; + + // C++ user-defined suffixes begin with '_' or are reserved for the standard + // library. + constexpr StringRef UDLiterals("a = 12.if;\n" + "b = -3i;\n" + "c = 100'01il;\n" + "d = 100'0.12il;\n" + "e = 12h;\n" + "f = 0XABE12h;\n" + "g = 0XFA03min;\n" + "h = 0X12B4Ds;\n" + "i = 20.13E-1ms;\n" + "j = 20.13E-1us;\n" + "k = 20.13E-1ns;\n" + "l = 20.13E-1y;\n" + "m = 20.13E-1d;\n" + "n = 20.13E-1d;\n" + "o = 1d;\n" + "p = 102_ffl_lzlz;\n" + "q = 10.2_l;\n" + "r = 0XABDE.0'1P-23_f;\n" + "s = 102_foo_bar;\n" + "t = 123.456_felfz_ballpen;\n" + "u = 0XBEAD1_spacebar;"); + + verifyFormat(UDLiterals, Style); + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + verifyFormat(UDLiterals, Style); +} + +TEST_F(NumericLiteralCaseTest, FixRanges) { + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + + constexpr StringRef CodeBlock("a = 0xFea3duLL;\n" + "b = 0X.aEbp-12f;\n" + "c = 0uLL;\n" + "// clang-format off\n" + "e = 0xBeAdu;\n" + "// clang-format on\n" + "g = 0xabCDu;\n" + "h = 0b010uL;\n" + "// clang-format off\n" + "i = 0B1010'000Zu;\n" + "// clang-format on\n" + "k = 0XaBuL;"); + + verifyFormat("a = 0xfea3dull;\n" + "b = 0x.aebp-12f;\n" + "c = 0ull;\n" + "// clang-format off\n" + "e = 0xBeAdu;\n" + "// clang-format on\n" + "g = 0xabcdu;\n" + "h = 0b010ul;\n" + "// clang-format off\n" + "i = 0B1010'000Zu;\n" + "// clang-format on\n" + "k = 0xabul;", + CodeBlock, Style); +} + +TEST_F(NumericLiteralCaseTest, UnderScoreSeparatorLanguages) { + auto Style = getLLVMStyle(); + + constexpr StringRef CodeBlock("a = 0xFea_3dl;\n" + "b = 0123_345;\n" + "c = 0b11____00lU;\n" + "d = 0XB_e_A_du;\n" + "e = 123_456.333__456e-10f;\n" + "f = .1_0E-10D;\n" + "g = 1_0.F;\n" + "h = 0B1_0;"); + auto TestUnderscore = [&](auto Language) { + Style.Language = Language; + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper; + verifyFormat("a = 0xFEA_3DL;\n" + "b = 0123_345;\n" + "c = 0b11____00LU;\n" + "d = 0xB_E_A_DU;\n" + "e = 123_456.333__456e-10F;\n" + "f = .1_0e-10D;\n" + "g = 1_0.F;\n" + "h = 0b1_0;", + CodeBlock, Style); + + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + + verifyFormat("a = 0Xfea_3dl;\n" + "b = 0123_345;\n" + "c = 0B11____00lu;\n" + "d = 0Xb_e_a_du;\n" + "e = 123_456.333__456E-10f;\n" + "f = .1_0E-10d;\n" + "g = 1_0.f;\n" + "h = 0B1_0;", + CodeBlock, Style); + }; + + TestUnderscore(FormatStyle::LK_CSharp); + TestUnderscore(FormatStyle::LK_Java); + TestUnderscore(FormatStyle::LK_JavaScript); + + Style.Language = FormatStyle::LK_JavaScript; + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper; + verifyFormat("o = 0O0_10_010;", "o = 0o0_10_010;", Style); + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower; + verifyFormat("o = 0o0_10_010;", "o = 0O0_10_010;", Style); +} + +} // namespace +} // namespace test +} // namespace format +} // namespace clang From 5539daf8120cfe20c0c36d115319e14077e5fa79 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 12 Sep 2025 10:23:33 +0200 Subject: [PATCH 089/734] [ARM] Make test more robust (NFC) Make sure this doesn't optimize down to something simpler. --- llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll index c6ffb92d60d8d..8e570f0e91a08 100644 --- a/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll +++ b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll @@ -25,7 +25,7 @@ define internal i32 @table_switch(i32 %x) "branch-target-enforcement" { ; CHECK-NEXT: movs r0, #3 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB0_5: @ %bb4 -; CHECK-NEXT: movs r0, #4 +; CHECK-NEXT: movs r0, #5 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB0_6: @ %sw.epilog ; CHECK-NEXT: movs r0, #0 @@ -51,7 +51,7 @@ sw.epilog: br label %return return: - %ret = phi i32 [ 0, %sw.epilog ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ] + %ret = phi i32 [ 0, %sw.epilog ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 5, %bb4 ] ret i32 %ret } From 4d24407e7d0ae66a8fef29f2100706d85706ae10 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 12 Sep 2025 10:38:37 +0200 Subject: [PATCH 090/734] [libc++][C++03] cherry-pick #101889 (#157881) --- libcxx/include/__cxx03/fstream | 43 +++++++++++-------- libcxx/include/__cxx03/ios | 3 +- libcxx/include/__cxx03/sstream | 32 +++++++------- libcxx/include/__cxx03/string | 4 +- .../fstreams/fstream.cons/default.pass.cpp | 2 - .../fstreams/fstream.cons/move.pass.cpp | 2 - .../fstreams/fstream.cons/pointer.pass.cpp | 2 - .../fstreams/fstream.cons/string.pass.cpp | 2 - .../fstreams/ifstream.cons/default.pass.cpp | 2 - .../fstreams/ifstream.cons/move.pass.cpp | 2 - .../fstreams/ifstream.cons/pointer.pass.cpp | 2 - .../fstreams/ifstream.cons/string.pass.cpp | 2 - .../fstreams/ofstream.cons/default.pass.cpp | 2 - .../fstreams/ofstream.cons/move.pass.cpp | 2 - .../fstreams/ofstream.cons/pointer.pass.cpp | 2 - .../fstreams/ofstream.cons/string.pass.cpp | 2 - .../ios/basic.ios.members/copyfmt.pass.cpp | 2 - .../istringstream.cons/default.pass.cpp | 2 - .../istringstream.cons/move.pass.cpp | 2 - .../istringstream.cons/string.pass.cpp | 2 - .../ostringstream.cons/default.pass.cpp | 2 - .../ostringstream.cons/move.pass.cpp | 2 - .../ostringstream.cons/string.pass.cpp | 2 - .../stringstream.cons/default.pass.cpp | 2 - .../stringstream.cons/move.pass.cpp | 2 - .../stringstream.cons/string.pass.cpp | 2 - 26 files changed, 46 insertions(+), 80 deletions(-) diff --git a/libcxx/include/__cxx03/fstream b/libcxx/include/__cxx03/fstream index 44bdabc4602b5..65c2c3e975032 100644 --- a/libcxx/include/__cxx03/fstream +++ b/libcxx/include/__cxx03/fstream @@ -191,6 +191,7 @@ typedef basic_fstream wfstream; #include <__cxx03/__config> #include <__cxx03/__fwd/fstream.h> #include <__cxx03/__locale> +#include <__cxx03/__memory/addressof.h> #include <__cxx03/__type_traits/enable_if.h> #include <__cxx03/__type_traits/is_same.h> #include <__cxx03/__utility/move.h> @@ -1062,11 +1063,12 @@ private: }; template -inline basic_ifstream<_CharT, _Traits>::basic_ifstream() : basic_istream(&__sb_) {} +inline basic_ifstream<_CharT, _Traits>::basic_ifstream() + : basic_istream(std::addressof(__sb_)) {} template inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const char* __s, ios_base::openmode __mode) - : basic_istream(&__sb_) { + : basic_istream(std::addressof(__sb_)) { if (__sb_.open(__s, __mode | ios_base::in) == nullptr) this->setstate(ios_base::failbit); } @@ -1074,15 +1076,16 @@ inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const char* __s, ios_base # ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR template inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const wchar_t* __s, ios_base::openmode __mode) - : basic_istream(&__sb_) { + : basic_istream(std::addressof(__sb_)) { if (__sb_.open(__s, __mode | ios_base::in) == nullptr) this->setstate(ios_base::failbit); } # endif +// extension template inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const string& __s, ios_base::openmode __mode) - : basic_istream(&__sb_) { + : basic_istream(std::addressof(__sb_)) { if (__sb_.open(__s, __mode | ios_base::in) == nullptr) this->setstate(ios_base::failbit); } @@ -1090,7 +1093,7 @@ inline basic_ifstream<_CharT, _Traits>::basic_ifstream(const string& __s, ios_ba template inline basic_ifstream<_CharT, _Traits>::basic_ifstream(basic_ifstream&& __rhs) : basic_istream(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) { - this->set_rdbuf(&__sb_); + this->set_rdbuf(std::addressof(__sb_)); } template @@ -1113,7 +1116,7 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(basic_ifstream<_CharT, _Traits>& __x, bas template inline basic_filebuf<_CharT, _Traits>* basic_ifstream<_CharT, _Traits>::rdbuf() const { - return const_cast*>(&__sb_); + return const_cast*>(std::addressof(__sb_)); } template @@ -1199,11 +1202,12 @@ private: }; template -inline basic_ofstream<_CharT, _Traits>::basic_ofstream() : basic_ostream(&__sb_) {} +inline basic_ofstream<_CharT, _Traits>::basic_ofstream() + : basic_ostream(std::addressof(__sb_)) {} template inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const char* __s, ios_base::openmode __mode) - : basic_ostream(&__sb_) { + : basic_ostream(std::addressof(__sb_)) { if (__sb_.open(__s, __mode | ios_base::out) == nullptr) this->setstate(ios_base::failbit); } @@ -1211,15 +1215,16 @@ inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const char* __s, ios_base # ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR template inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const wchar_t* __s, ios_base::openmode __mode) - : basic_ostream(&__sb_) { + : basic_ostream(std::addressof(__sb_)) { if (__sb_.open(__s, __mode | ios_base::out) == nullptr) this->setstate(ios_base::failbit); } # endif +// extension template inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const string& __s, ios_base::openmode __mode) - : basic_ostream(&__sb_) { + : basic_ostream(std::addressof(__sb_)) { if (__sb_.open(__s, __mode | ios_base::out) == nullptr) this->setstate(ios_base::failbit); } @@ -1227,7 +1232,7 @@ inline basic_ofstream<_CharT, _Traits>::basic_ofstream(const string& __s, ios_ba template inline basic_ofstream<_CharT, _Traits>::basic_ofstream(basic_ofstream&& __rhs) : basic_ostream(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) { - this->set_rdbuf(&__sb_); + this->set_rdbuf(std::addressof(__sb_)); } template @@ -1250,7 +1255,7 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(basic_ofstream<_CharT, _Traits>& __x, bas template inline basic_filebuf<_CharT, _Traits>* basic_ofstream<_CharT, _Traits>::rdbuf() const { - return const_cast*>(&__sb_); + return const_cast*>(std::addressof(__sb_)); } template @@ -1340,11 +1345,12 @@ private: }; template -inline basic_fstream<_CharT, _Traits>::basic_fstream() : basic_iostream(&__sb_) {} +inline basic_fstream<_CharT, _Traits>::basic_fstream() + : basic_iostream(std::addressof(__sb_)) {} template inline basic_fstream<_CharT, _Traits>::basic_fstream(const char* __s, ios_base::openmode __mode) - : basic_iostream(&__sb_) { + : basic_iostream(std::addressof(__sb_)) { if (__sb_.open(__s, __mode) == nullptr) this->setstate(ios_base::failbit); } @@ -1352,7 +1358,7 @@ inline basic_fstream<_CharT, _Traits>::basic_fstream(const char* __s, ios_base:: # ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR template inline basic_fstream<_CharT, _Traits>::basic_fstream(const wchar_t* __s, ios_base::openmode __mode) - : basic_iostream(&__sb_) { + : basic_iostream(std::addressof(__sb_)) { if (__sb_.open(__s, __mode) == nullptr) this->setstate(ios_base::failbit); } @@ -1360,15 +1366,16 @@ inline basic_fstream<_CharT, _Traits>::basic_fstream(const wchar_t* __s, ios_bas template inline basic_fstream<_CharT, _Traits>::basic_fstream(const string& __s, ios_base::openmode __mode) - : basic_iostream(&__sb_) { + : basic_iostream(std::addressof(__sb_)) { if (__sb_.open(__s, __mode) == nullptr) this->setstate(ios_base::failbit); } +// extension template inline basic_fstream<_CharT, _Traits>::basic_fstream(basic_fstream&& __rhs) : basic_iostream(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) { - this->set_rdbuf(&__sb_); + this->set_rdbuf(std::addressof(__sb_)); } template @@ -1391,7 +1398,7 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(basic_fstream<_CharT, _Traits>& __x, basi template inline basic_filebuf<_CharT, _Traits>* basic_fstream<_CharT, _Traits>::rdbuf() const { - return const_cast*>(&__sb_); + return const_cast*>(std::addressof(__sb_)); } template diff --git a/libcxx/include/__cxx03/ios b/libcxx/include/__cxx03/ios index 7c522909e6428..aa03ce348b624 100644 --- a/libcxx/include/__cxx03/ios +++ b/libcxx/include/__cxx03/ios @@ -218,6 +218,7 @@ storage-class-specifier const error_category& iostream_category() noexcept; # include <__cxx03/__fwd/ios.h> # include <__cxx03/__ios/fpos.h> # include <__cxx03/__locale> +# include <__cxx03/__memory/addressof.h> # include <__cxx03/__system_error/error_category.h> # include <__cxx03/__system_error/error_code.h> # include <__cxx03/__system_error/error_condition.h> @@ -696,7 +697,7 @@ inline _LIBCPP_HIDE_FROM_ABI _CharT basic_ios<_CharT, _Traits>::fill(char_type _ template basic_ios<_CharT, _Traits>& basic_ios<_CharT, _Traits>::copyfmt(const basic_ios& __rhs) { - if (this != &__rhs) { + if (this != std::addressof(__rhs)) { __call_callbacks(erase_event); ios_base::copyfmt(__rhs); __tie_ = __rhs.__tie_; diff --git a/libcxx/include/__cxx03/sstream b/libcxx/include/__cxx03/sstream index de56cd99553e2..44c2423a6e1fa 100644 --- a/libcxx/include/__cxx03/sstream +++ b/libcxx/include/__cxx03/sstream @@ -713,18 +713,19 @@ private: public: // [istringstream.cons] Constructors: - _LIBCPP_HIDE_FROM_ABI basic_istringstream() : basic_istream<_CharT, _Traits>(&__sb_), __sb_(ios_base::in) {} + _LIBCPP_HIDE_FROM_ABI basic_istringstream() + : basic_istream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(ios_base::in) {} _LIBCPP_HIDE_FROM_ABI explicit basic_istringstream(ios_base::openmode __wch) - : basic_istream<_CharT, _Traits>(&__sb_), __sb_(__wch | ios_base::in) {} + : basic_istream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__wch | ios_base::in) {} _LIBCPP_HIDE_FROM_ABI explicit basic_istringstream(const string_type& __s, ios_base::openmode __wch = ios_base::in) - : basic_istream<_CharT, _Traits>(&__sb_), __sb_(__s, __wch | ios_base::in) {} + : basic_istream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__s, __wch | ios_base::in) {} basic_istringstream(const basic_istringstream&) = delete; _LIBCPP_HIDE_FROM_ABI basic_istringstream(basic_istringstream&& __rhs) : basic_istream<_CharT, _Traits>(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) { - basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_); + basic_istream<_CharT, _Traits>::set_rdbuf(std::addressof(__sb_)); } // [istringstream.assign] Assign and swap: @@ -741,7 +742,7 @@ public: // [istringstream.members] Member functions: _LIBCPP_HIDE_FROM_ABI basic_stringbuf* rdbuf() const { - return const_cast*>(&__sb_); + return const_cast*>(std::addressof(__sb_)); } _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); } @@ -774,18 +775,19 @@ private: public: // [ostringstream.cons] Constructors: - _LIBCPP_HIDE_FROM_ABI basic_ostringstream() : basic_ostream<_CharT, _Traits>(&__sb_), __sb_(ios_base::out) {} + _LIBCPP_HIDE_FROM_ABI basic_ostringstream() + : basic_ostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(ios_base::out) {} _LIBCPP_HIDE_FROM_ABI explicit basic_ostringstream(ios_base::openmode __wch) - : basic_ostream<_CharT, _Traits>(&__sb_), __sb_(__wch | ios_base::out) {} + : basic_ostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__wch | ios_base::out) {} _LIBCPP_HIDE_FROM_ABI explicit basic_ostringstream(const string_type& __s, ios_base::openmode __wch = ios_base::out) - : basic_ostream<_CharT, _Traits>(&__sb_), __sb_(__s, __wch | ios_base::out) {} + : basic_ostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__s, __wch | ios_base::out) {} basic_ostringstream(const basic_ostringstream&) = delete; _LIBCPP_HIDE_FROM_ABI basic_ostringstream(basic_ostringstream&& __rhs) : basic_ostream<_CharT, _Traits>(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) { - basic_ostream<_CharT, _Traits>::set_rdbuf(&__sb_); + basic_ostream<_CharT, _Traits>::set_rdbuf(std::addressof(__sb_)); } // [ostringstream.assign] Assign and swap: @@ -803,7 +805,7 @@ public: // [ostringstream.members] Member functions: _LIBCPP_HIDE_FROM_ABI basic_stringbuf* rdbuf() const { - return const_cast*>(&__sb_); + return const_cast*>(std::addressof(__sb_)); } _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); } @@ -836,19 +838,19 @@ private: public: // [stringstream.cons] constructors _LIBCPP_HIDE_FROM_ABI basic_stringstream() - : basic_iostream<_CharT, _Traits>(&__sb_), __sb_(ios_base::in | ios_base::out) {} + : basic_iostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(ios_base::in | ios_base::out) {} _LIBCPP_HIDE_FROM_ABI explicit basic_stringstream(ios_base::openmode __wch) - : basic_iostream<_CharT, _Traits>(&__sb_), __sb_(__wch) {} + : basic_iostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__wch) {} _LIBCPP_HIDE_FROM_ABI explicit basic_stringstream(const string_type& __s, ios_base::openmode __wch = ios_base::in | ios_base::out) - : basic_iostream<_CharT, _Traits>(&__sb_), __sb_(__s, __wch) {} + : basic_iostream<_CharT, _Traits>(std::addressof(__sb_)), __sb_(__s, __wch) {} basic_stringstream(const basic_stringstream&) = delete; _LIBCPP_HIDE_FROM_ABI basic_stringstream(basic_stringstream&& __rhs) : basic_iostream<_CharT, _Traits>(std::move(__rhs)), __sb_(std::move(__rhs.__sb_)) { - basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_); + basic_istream<_CharT, _Traits>::set_rdbuf(std::addressof(__sb_)); } // [stringstream.assign] Assign and swap: @@ -865,7 +867,7 @@ public: // [stringstream.members] Member functions: _LIBCPP_HIDE_FROM_ABI basic_stringbuf* rdbuf() const { - return const_cast*>(&__sb_); + return const_cast*>(std::addressof(__sb_)); } _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); } diff --git a/libcxx/include/__cxx03/string b/libcxx/include/__cxx03/string index 178140486105e..6c43fca08f109 100644 --- a/libcxx/include/__cxx03/string +++ b/libcxx/include/__cxx03/string @@ -2866,13 +2866,13 @@ inline void basic_string<_CharT, _Traits, _Allocator>::swap(basic_string& __str) "swapping non-equal allocators"); if (!__is_long()) __annotate_delete(); - if (this != &__str && !__str.__is_long()) + if (this != std::addressof(__str) && !__str.__is_long()) __str.__annotate_delete(); std::swap(__r_.first(), __str.__r_.first()); std::__swap_allocator(__alloc(), __str.__alloc()); if (!__is_long()) __annotate_new(__get_short_size()); - if (this != &__str && !__str.__is_long()) + if (this != std::addressof(__str) && !__str.__is_long()) __str.__annotate_new(__str.__get_short_size()); } diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp index d2efaf1561631..d15276b440157 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp @@ -13,8 +13,6 @@ // basic_fstream(); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp index 153487898e157..95a04bdfccdbc 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp @@ -13,8 +13,6 @@ // basic_fstream(basic_fstream&& rhs); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp index ca226242773ad..2e0ebcd684d79 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp @@ -18,8 +18,6 @@ // XFAIL: LIBCXX-AIX-FIXME -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp index 28cefc77d6a90..ca0921a00b9b6 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp @@ -13,8 +13,6 @@ // explicit basic_fstream(const string& s, ios_base::openmode mode = ios_base::in|ios_base::out); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp index 256380d2c164a..70d1efca20c65 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp @@ -13,8 +13,6 @@ // basic_ifstream(); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp index c8be388f40698..81ec800954cc2 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp @@ -8,8 +8,6 @@ // FILE_DEPENDENCIES: test.dat -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - // // template > diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp index 711ab2a74b516..6bbe6f1ff7754 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp @@ -8,8 +8,6 @@ // FILE_DEPENDENCIES: test.dat -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - // // template > diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp index d4bbb3c0cabfc..e1a9b53da1348 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp @@ -15,8 +15,6 @@ // explicit basic_ifstream(const string& s, ios_base::openmode mode = ios_base::in); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp index 4cda1db438342..a7b0918f79365 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp @@ -13,8 +13,6 @@ // basic_ofstream(); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp index 501a4c90ca3fe..ec02fa2621c19 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp @@ -13,8 +13,6 @@ // basic_ofstream(basic_ofstream&& rhs); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp index 1b5a55df73717..fbb03f1e85841 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp @@ -18,8 +18,6 @@ // XFAIL: LIBCXX-AIX-FIXME -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include #include diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp index bb18c88bd326e..33a7e9b2b6f50 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp @@ -13,8 +13,6 @@ // explicit basic_ofstream(const string& s, ios_base::openmode mode = ios_base::out); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include #include diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/copyfmt.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/copyfmt.pass.cpp index 768922192038b..d78f7df8f6b5e 100644 --- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/copyfmt.pass.cpp +++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/copyfmt.pass.cpp @@ -15,8 +15,6 @@ // basic_ios& copyfmt(const basic_ios& rhs); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include #include diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp index 8cd23d45598b8..8c73df42ae4be 100644 --- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp @@ -15,8 +15,6 @@ // basic_istringstream() : basic_istringstream(ios_base::in) {} // C++20 // explicit basic_istringstream(ios_base::openmode which); // C++20 -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp index 1af3304d08971..00ac7cc6414e9 100644 --- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp @@ -13,8 +13,6 @@ // basic_istringstream(basic_istringstream&& rhs); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp index 7755dd926c2f6..4a5965e7e96e9 100644 --- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp @@ -14,8 +14,6 @@ // explicit basic_istringstream(const basic_string& str, // ios_base::openmode which = ios_base::in); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp index eb248a7801a3c..a6b98a4e36293 100644 --- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp @@ -15,8 +15,6 @@ // basic_ostringstream() : basic_ostringstream(ios_base::out) {} // C++20 // explicit basic_ostringstream(ios_base::openmode which); // C++20 -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp index 62ece7b63b31c..596a3e7d53584 100644 --- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp @@ -13,8 +13,6 @@ // basic_ostringstream(basic_ostringstream&& rhs); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp index bbec8f79a1862..9e9405ad49217 100644 --- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp @@ -14,8 +14,6 @@ // explicit basic_ostringstream(const basic_string& str, // ios_base::openmode which = ios_base::in); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/default.pass.cpp index 0e535814ae54e..4f9e7e026c50f 100644 --- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/default.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/default.pass.cpp @@ -15,8 +15,6 @@ // basic_stringstream() : basic_stringstream(ios_base::out | ios_base::in) {} // C++20 // explicit basic_stringstream(ios_base::openmode which); // C++20 -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/move.pass.cpp index e905f5f7c686a..0702d9a278d3c 100644 --- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/move.pass.cpp @@ -13,8 +13,6 @@ // basic_stringstream(basic_stringstream&& rhs); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/string.pass.cpp index d4fe18afbd28f..08880878361da 100644 --- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/string.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.cons/string.pass.cpp @@ -14,8 +14,6 @@ // explicit basic_stringstream(const basic_string& str, // ios_base::openmode which = ios_base::out|ios_base::in); -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - #include #include From 1b05212acc1964837135930a129ee26e1a392278 Mon Sep 17 00:00:00 2001 From: "Henrik G. Olsson" Date: Fri, 12 Sep 2025 01:47:57 -0700 Subject: [PATCH 091/734] [Utils] fix diff_test_updater on Windows (#158235) --- llvm/utils/lit/lit/DiffUpdater.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/llvm/utils/lit/lit/DiffUpdater.py b/llvm/utils/lit/lit/DiffUpdater.py index fefcdcc99f3f2..a29c46fb8508f 100644 --- a/llvm/utils/lit/lit/DiffUpdater.py +++ b/llvm/utils/lit/lit/DiffUpdater.py @@ -62,17 +62,19 @@ def __str__(self): @staticmethod def get_target_dir(commands, test_path): + # posix=True breaks Windows paths because \ is treated as an escaping character for cmd in commands: - split = shlex.split(cmd) + split = shlex.split(cmd, posix=False) if "split-file" not in split: continue start_idx = split.index("split-file") split = split[start_idx:] if len(split) < 3: continue - if split[1].strip() != test_path: + p = unquote(split[1].strip()) + if not test_path.samefile(p): continue - return split[2].strip() + return unquote(split[2].strip()) return None @staticmethod @@ -104,6 +106,12 @@ def _get_split_line_path(l): return l.rstrip() +def unquote(s): + if len(s) > 1 and s[0] == s[-1] and (s[0] == '"' or s[0] == "'"): + return s[1:-1] + return s + + def get_source_and_target(a, b, test_path, commands): """ Try to figure out which file is the test output and which is the reference. @@ -145,7 +153,7 @@ def diff_test_updater(result, test, commands): [cmd, a, b] = args if cmd != "diff": return None - res = get_source_and_target(a, b, test.getFilePath(), commands) + res = get_source_and_target(a, b, pathlib.Path(test.getFilePath()), commands) if not res: return f"update-diff-test: could not deduce source and target from {a} and {b}" source, target = res From fae68b6c77058dd10d7c6780181ff312e46f0689 Mon Sep 17 00:00:00 2001 From: Georgiy Samoylov Date: Fri, 12 Sep 2025 11:49:33 +0300 Subject: [PATCH 092/734] [RISCV] Enabled debug entry support by default (#157703) This patch enables support for debug entry values. This improves quality of debug info for RISC-V --- clang/lib/Frontend/CompilerInvocation.cpp | 7 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 12 ++-- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 3 + .../RISCV/dw_op_entry_value_32bit.ll | 65 +++++++++++++++++++ .../RISCV/dw_op_entry_value_64bit.ll | 65 +++++++++++++++++++ 5 files changed, 143 insertions(+), 9 deletions(-) create mode 100644 llvm/test/DebugInfo/RISCV/dw_op_entry_value_32bit.ll create mode 100644 llvm/test/DebugInfo/RISCV/dw_op_entry_value_64bit.ll diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 931766db4b0c8..761310813f787 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1975,9 +1975,10 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, } const llvm::Triple::ArchType DebugEntryValueArchs[] = { - llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64, - llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips, - llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el}; + llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64, + llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips, + llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el, + llvm::Triple::riscv32, llvm::Triple::riscv64}; if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() && llvm::is_contained(DebugEntryValueArchs, T.getArch())) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4f137756d2f48..1d01de336b787 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -23258,6 +23258,10 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, if (VA.isRegLoc()) { // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); + + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.EmitCallSiteInfo) + CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i); } else { assert(VA.isMemLoc() && "Argument not register or memory"); assert(!IsTailCall && "Tail call not allowed if stack is used " @@ -23359,9 +23363,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, if (CLI.CFIType) Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); - if (MF.getTarget().Options.EmitCallGraphSection && CB && - CB->isIndirectCall()) - DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); + DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); return Ret; } @@ -23370,10 +23372,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, if (CLI.CFIType) Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); - if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall()) - DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); - DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); + DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); Glue = Chain.getValue(1); // Mark the end of the call, which is glued to the call itself. diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 0668b3896fa2d..f81b1e1260ee3 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -163,6 +163,9 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, setMachineOutliner(true); setSupportsDefaultOutlining(true); + // RISC-V supports the debug entry values. + setSupportsDebugEntryValues(true); + if (TT.isOSFuchsia() && !TT.isArch64Bit()) report_fatal_error("Fuchsia is only supported for 64-bit"); diff --git a/llvm/test/DebugInfo/RISCV/dw_op_entry_value_32bit.ll b/llvm/test/DebugInfo/RISCV/dw_op_entry_value_32bit.ll new file mode 100644 index 0000000000000..cb7c61df77646 --- /dev/null +++ b/llvm/test/DebugInfo/RISCV/dw_op_entry_value_32bit.ll @@ -0,0 +1,65 @@ +;; Test RISC-V 32 bit: +; RUN: llc -emit-call-site-info -stop-after=livedebugvalues -mtriple=riscv32-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK32 + +;; Built from source: +;; extern long fn1(long,long,long); +;; long fn2(long a, long b, long c) { +;; long local = fn1(a+b, c, b+10); +;; if (local > 10) +;; return local + 10; +;; return b; +;; } +;; Using command: +;; clang -g -O2 -target riscv32-linux-gnu m.c -c -S -emit-llvm +;; Confirm that info from callSites attribute is used as entry_value in DIExpression. + +;; Test riscv32: +; CHECK32: $x10 = nsw ADD $x11, killed renamable $x10 +; CHECK32-NEXT: DBG_VALUE $x10, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1) + +; ModuleID = 'm.c' +source_filename = "m.c" +target datalayout = "e-m:e-p:32:32-i64:64-n32-S128" +target triple = "riscv32-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @fn2(i32 noundef %a, i32 noundef %b, i32 noundef %c) !dbg !14 { +entry: + #dbg_value(i32 %a, !20, !DIExpression(), !23) + #dbg_value(i32 %b, !21, !DIExpression(), !23) + #dbg_value(i32 %c, !22, !DIExpression(), !23) + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %b, 10 + %call = tail call i32 @fn1(i32 noundef %add, i32 noundef %c, i32 noundef %add1) + #dbg_value(i32 %call, !22, !DIExpression(), !23) + %cmp = icmp sgt i32 %call, 10 + %add2 = add nuw nsw i32 %call, 10 + %retval.0 = select i1 %cmp, i32 %add2, i32 %b + ret i32 %retval.0, !dbg !29 +} + +declare !dbg !30 i32 @fn1(i32 noundef, i32 noundef, i32 noundef) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "m.c", directory: ".") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{!"clang"} +!14 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 2, type: !15, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18) +!15 = !DISubroutineType(types: !16) +!16 = !{!17, !17, !17, !17} +!17 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +!18 = !{!19, !20, !21, !22} +!19 = !DILocalVariable(name: "a", arg: 1, scope: !14, file: !1, line: 2, type: !17) +!20 = !DILocalVariable(name: "b", arg: 2, scope: !14, file: !1, line: 2, type: !17) +!21 = !DILocalVariable(name: "c", arg: 3, scope: !14, file: !1, line: 2, type: !17) +!22 = !DILocalVariable(name: "local", scope: !14, file: !1, line: 3, type: !17) +!23 = !DILocation(line: 0, scope: !14) +!29 = !DILocation(line: 7, column: 1, scope: !14) +!30 = !DISubprogram(name: "fn1", scope: !1, file: !1, line: 1, type: !15, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) + diff --git a/llvm/test/DebugInfo/RISCV/dw_op_entry_value_64bit.ll b/llvm/test/DebugInfo/RISCV/dw_op_entry_value_64bit.ll new file mode 100644 index 0000000000000..cd6a7650780e6 --- /dev/null +++ b/llvm/test/DebugInfo/RISCV/dw_op_entry_value_64bit.ll @@ -0,0 +1,65 @@ +;; Test RISC-V 64 bit: +; RUN: llc -emit-call-site-info -stop-after=livedebugvalues -mtriple=riscv64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK64 + +;; Built from source: +;; extern long fn1(long,long,long); +;; long fn2(long a, long b, long c) { +;; long local = fn1(a+b, c, b+10); +;; if (local > 10) +;; return local + 10; +;; return b; +;; } +;; Using command: +;; clang -g -O2 -target riscv64-linux-gnu m.c -c -S -emit-llvm +;; Confirm that info from callSites attribute is used as entry_value in DIExpression. + +;; Test riscv64: +; CHECK64: $x10 = nsw ADD $x11, killed renamable $x10 +; CHECK64-NEXT: DBG_VALUE $x10, $noreg, !{{.*}}, !DIExpression(DW_OP_LLVM_entry_value, 1) + +; ModuleID = 'm.c' +source_filename = "m.c" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i64 @fn2(i64 noundef %a, i64 noundef %b, i64 noundef %c) !dbg !14 { +entry: + #dbg_value(i64 %a, !19, !DIExpression(), !23) + #dbg_value(i64 %b, !20, !DIExpression(), !23) + #dbg_value(i64 %c, !21, !DIExpression(), !23) + %add = add nsw i64 %b, %a + %add1 = add nsw i64 %b, 10 + %call = tail call i64 @fn1(i64 noundef %add, i64 noundef %c, i64 noundef %add1) + #dbg_value(i64 %call, !22, !DIExpression(), !23) + %cmp = icmp sgt i64 %call, 10 + %add2 = add nuw nsw i64 %call, 10 + %retval.0 = select i1 %cmp, i64 %add2, i64 %b + ret i64 %retval.0, !dbg !29 +} + +declare !dbg !30 i64 @fn1(i64 noundef, i64 noundef, i64 noundef) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "m.c", directory: ".") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{!"clang"} +!14 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 2, type: !15, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18) +!15 = !DISubroutineType(types: !16) +!16 = !{!17, !17, !17, !17} +!17 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +!18 = !{!19, !20, !21, !22} +!19 = !DILocalVariable(name: "a", arg: 1, scope: !14, file: !1, line: 2, type: !17) +!20 = !DILocalVariable(name: "b", arg: 2, scope: !14, file: !1, line: 2, type: !17) +!21 = !DILocalVariable(name: "c", arg: 3, scope: !14, file: !1, line: 2, type: !17) +!22 = !DILocalVariable(name: "local", scope: !14, file: !1, line: 3, type: !17) +!23 = !DILocation(line: 0, scope: !14) +!29 = !DILocation(line: 7, column: 1, scope: !14) +!30 = !DISubprogram(name: "fn1", scope: !1, file: !1, line: 1, type: !15, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) + From 4884d6cbce99286e349e239c1c1103b9f3820b56 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 12 Sep 2025 10:57:34 +0200 Subject: [PATCH 093/734] [libc++] Extend __default_three_way_comparator to any types that only implements operator<=> (#157602) This uses the new `__builtin_lt_synthesises_from_spaceship` builtin from clang to use three way comparison for arbitrary user-defined types that only provide a spaceship operator. --- libcxx/include/__type_traits/desugars_to.h | 4 ++ .../__utility/default_three_way_comparator.h | 32 +++++++++++++--- .../lazy_synth_three_way_comparator.h | 12 +++--- libcxx/include/string | 2 + .../has_default_three_way.compile.pass.cpp | 38 +++++++++++++++++++ 5 files changed, 77 insertions(+), 11 deletions(-) create mode 100644 libcxx/test/libcxx/utilities/utility/has_default_three_way.compile.pass.cpp diff --git a/libcxx/include/__type_traits/desugars_to.h b/libcxx/include/__type_traits/desugars_to.h index b67baae31b181..029b3c6336837 100644 --- a/libcxx/include/__type_traits/desugars_to.h +++ b/libcxx/include/__type_traits/desugars_to.h @@ -10,6 +10,7 @@ #define _LIBCPP___TYPE_TRAITS_DESUGARS_TO_H #include <__config> +#include <__type_traits/integral_constant.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -64,6 +65,9 @@ template inline const bool __desugars_to_v<_CanonicalTag, _Operation&&, _Args...> = __desugars_to_v<_CanonicalTag, _Operation, _Args...>; +template +struct __desugars_to : integral_constant > {}; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___TYPE_TRAITS_DESUGARS_TO_H diff --git a/libcxx/include/__utility/default_three_way_comparator.h b/libcxx/include/__utility/default_three_way_comparator.h index ce423c6ce98e4..438ab55b43230 100644 --- a/libcxx/include/__utility/default_three_way_comparator.h +++ b/libcxx/include/__utility/default_three_way_comparator.h @@ -27,9 +27,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD template struct __default_three_way_comparator; -template -struct __default_three_way_comparator<_Tp, _Tp, __enable_if_t::value> > { - _LIBCPP_HIDE_FROM_ABI static int operator()(_Tp __lhs, _Tp __rhs) { +template +struct __default_three_way_comparator<_LHS, + _RHS, + __enable_if_t::value && is_arithmetic<_RHS>::value> > { + _LIBCPP_HIDE_FROM_ABI static int operator()(_LHS __lhs, _RHS __rhs) { if (__lhs < __rhs) return -1; if (__lhs > __rhs) @@ -38,12 +40,30 @@ struct __default_three_way_comparator<_Tp, _Tp, __enable_if_t } }; +#if _LIBCPP_STD_VER >= 20 && __has_builtin(__builtin_lt_synthesises_from_spaceship) +template +struct __default_three_way_comparator< + _LHS, + _RHS, + __enable_if_t::value && is_arithmetic<_RHS>::value) && + __builtin_lt_synthesises_from_spaceship(const _LHS&, const _RHS&)>> { + _LIBCPP_HIDE_FROM_ABI static int operator()(const _LHS& __lhs, const _RHS& __rhs) { + auto __res = __lhs <=> __rhs; + if (__res < 0) + return -1; + if (__res > 0) + return 1; + return 0; + } +}; +#endif + template -inline const bool __has_default_three_way_comparator_v = false; +struct __has_default_three_way_comparator : false_type {}; template -inline const bool - __has_default_three_way_comparator_v< _LHS, _RHS, sizeof(__default_three_way_comparator<_LHS, _RHS>) >= 0> = true; +struct __has_default_three_way_comparator<_LHS, _RHS, sizeof(__default_three_way_comparator<_LHS, _RHS>) >= 0> + : true_type {}; _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__utility/lazy_synth_three_way_comparator.h b/libcxx/include/__utility/lazy_synth_three_way_comparator.h index ca98845f04191..9105d05e1ed6a 100644 --- a/libcxx/include/__utility/lazy_synth_three_way_comparator.h +++ b/libcxx/include/__utility/lazy_synth_three_way_comparator.h @@ -10,6 +10,7 @@ #define _LIBCPP___UTILITY_LAZY_SYNTH_THREE_WAY_COMPARATOR_H #include <__config> +#include <__type_traits/conjunction.h> #include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> #include <__utility/default_three_way_comparator.h> @@ -69,11 +70,12 @@ struct __eager_compare_result { }; template -struct __lazy_synth_three_way_comparator<_Comparator, - _LHS, - _RHS, - __enable_if_t<__desugars_to_v<__less_tag, _Comparator, _LHS, _RHS> && - __has_default_three_way_comparator_v<_LHS, _RHS> > > { +struct __lazy_synth_three_way_comparator< + _Comparator, + _LHS, + _RHS, + __enable_if_t<_And<__desugars_to<__less_tag, _Comparator, _LHS, _RHS>, + __has_default_three_way_comparator<_LHS, _RHS> >::value> > { // This lifetimebound annotation is technically incorrect, but other specializations actually capture the lifetime of // the comparator. _LIBCPP_HIDE_FROM_ABI __lazy_synth_three_way_comparator(_LIBCPP_CTOR_LIFETIMEBOUND const _Comparator&) {} diff --git a/libcxx/include/string b/libcxx/include/string index 0abdfebcb863f..f13a7640760f7 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -2521,6 +2521,7 @@ _LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_DECLARE, wchar_t) # endif # undef _LIBCPP_DECLARE +# if _LIBCPP_STD_VER <= 17 || !__has_builtin(__builtin_lt_synthesises_from_spaceship) template struct __default_three_way_comparator, basic_string<_CharT, _Traits, _Alloc> > { using __string_t _LIBCPP_NODEBUG = basic_string<_CharT, _Traits, _Alloc>; @@ -2533,6 +2534,7 @@ struct __default_three_way_comparator, bas return __ret; } }; +# endif # if _LIBCPP_STD_VER >= 17 template +#include +#include + +static_assert(std::__has_default_three_way_comparator::value); +static_assert(std::__has_default_three_way_comparator::value); +static_assert(std::__has_default_three_way_comparator::value); +static_assert(std::__has_default_three_way_comparator::value); +static_assert(std::__has_default_three_way_comparator::value); + +#if __has_builtin(__builtin_lt_synthesises_from_spaceship) +static_assert(std::__has_default_three_way_comparator::value); +static_assert(std::__has_default_three_way_comparator::value); +static_assert(std::__has_default_three_way_comparator::value); +static_assert(std::__has_default_three_way_comparator::value); +static_assert(std::__has_default_three_way_comparator::value); +static_assert(!std::__has_default_three_way_comparator::value); + +static_assert(std::__has_default_three_way_comparator&, const std::vector&>::value); + +struct MyStruct { + int i; + + friend auto operator<=>(MyStruct, MyStruct) = default; +}; + +static_assert(std::__has_default_three_way_comparator::value); +#endif From baec6c55c1ddb226aa50f1663d9a4d61f44e1665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Fri, 12 Sep 2025 10:59:41 +0200 Subject: [PATCH 094/734] [clang][ASTImporter] Fixed test 'ctu-import-type-decl-definition' and ASTImporter (#158016) The test was faulty and did not reproduce the error in ASTImporter. The previous fix #156056 for the crash was not correct, this is fixed here. --- clang/lib/AST/ASTImporter.cpp | 16 +++++++--------- .../Analysis/ctu-import-type-decl-definition.c | 16 +++++++++------- clang/unittests/AST/ASTImporterTest.cpp | 3 ++- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index fe7f1e5eb0310..db14272ae5db8 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1745,15 +1745,13 @@ ExpectedType ASTNodeImporter::VisitTagType(const TagType *T) { if (!ToDeclOrErr) return ToDeclOrErr.takeError(); - if (DeclForType->isUsed()) { - // If there is a definition of the 'OriginalDecl', it should be imported to - // have all information for the type in the "To" AST. (In some cases no - // other reference may exist to the definition decl and it would not be - // imported otherwise.) - Expected ToDefDeclOrErr = import(DeclForType->getDefinition()); - if (!ToDefDeclOrErr) - return ToDefDeclOrErr.takeError(); - } + // If there is a definition of the 'OriginalDecl', it should be imported to + // have all information for the type in the "To" AST. (In some cases no + // other reference may exist to the definition decl and it would not be + // imported otherwise.) + Expected ToDefDeclOrErr = import(DeclForType->getDefinition()); + if (!ToDefDeclOrErr) + return ToDefDeclOrErr.takeError(); if (T->isCanonicalUnqualified()) return Importer.getToContext().getCanonicalTagType(*ToDeclOrErr); diff --git a/clang/test/Analysis/ctu-import-type-decl-definition.c b/clang/test/Analysis/ctu-import-type-decl-definition.c index f74920697e977..10910e0812f3a 100644 --- a/clang/test/Analysis/ctu-import-type-decl-definition.c +++ b/clang/test/Analysis/ctu-import-type-decl-definition.c @@ -2,26 +2,28 @@ // RUN: mkdir -p %t // RUN: split-file %s %t -// RUN: %clang_cc1 -emit-pch -o %t/import.c.ast %t/import.c +// RUN: %clang_cc1 -x c -emit-pch -o %t/import.c.ast %t/import.c -// RUN: %clang_extdef_map -- -x c %t/import.c >> %t/externalDefMap.tmp.txt -// RUN: sed 's/$/.ast/' %t/externalDefMap.tmp.txt >> %t/externalDefMap.txt +// RUN: %clang_extdef_map %t/import.c -- -c -x c > %t/externalDefMap.tmp.txt +// RUN: sed 's/$/.ast/' %t/externalDefMap.tmp.txt > %t/externalDefMap.txt // RUN: %clang_cc1 -analyze \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-config experimental-enable-naive-ctu-analysis=true \ // RUN: -analyzer-config display-ctu-progress=true \ // RUN: -analyzer-config ctu-dir=%t \ -// RUN: -verify %t/main.c +// RUN: -verify %t/main.c 2>&1 | FileCheck %s //--- main.c // expected-no-diagnostics +// CHECK: CTU loaded AST file: typedef struct X_s X_t; -unsigned long f_import(struct X_s *xPtr); -static void freeWriteFileResources(struct X_s *xPtr) { +long f_import(struct X_s *xPtr); + +static void f_main(struct X_s *xPtr) { f_import(xPtr); } @@ -36,7 +38,7 @@ struct X_s { Y_t y; }; -unsigned long f_import(struct X_s *xPtr) { +long f_import(struct X_s *xPtr) { if (xPtr != 0) { } return 0; diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 5badbd7d65e48..ac40a871c0252 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -10025,7 +10025,8 @@ struct ImportTemplateParmDeclDefaultValue EXPECT_EQ(ToD->getPreviousDecl(), ToDInherited); } else { EXPECT_EQ(FromD, FromDInherited->getPreviousDecl()); - EXPECT_EQ(ToD, ToDInherited->getPreviousDecl()); + // The order is reversed by the import process. + EXPECT_EQ(ToD->getPreviousDecl(), ToDInherited); } } From 9e1d656c682cd0bf6f123fba2064ffdb8861f790 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 18:02:24 +0900 Subject: [PATCH 095/734] AMDGPU: Remove MIMG special case in adjustAllocatableRegClass (#158184) I have no idea why this was here. MIMG atomics use tied operands for the input and output, so AV classes should have always worked. We have poor test coverage for AGPRs with atomics, so add a partial set. Everything seems to work OK, although it seems image cmpswap always uses VGPRs unnecessarily. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 +- .../llvm.amdgcn.image.atomic.dim.gfx90a.ll | 170 ++++++++++++++++++ .../AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll | 108 +++++++++++ 3 files changed, 279 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 23a124fecddad..5c3340703ba3b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5977,8 +5977,7 @@ static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MCInstrDesc &TID, unsigned RCID) { if (!ST.hasGFX90AInsts() && (((TID.mayLoad() || TID.mayStore()) && - !(TID.TSFlags & SIInstrFlags::Spill)) || - (TID.TSFlags & SIInstrFlags::MIMG))) { + !(TID.TSFlags & SIInstrFlags::Spill)))) { switch (RCID) { case AMDGPU::AV_32RegClassID: RCID = AMDGPU::VGPR_32RegClassID; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll new file mode 100644 index 0000000000000..49607e320bd0a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s + +define amdgpu_ps void @atomic_swap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) { +; GFX90A-LABEL: atomic_swap_1d_agpr: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a0 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_endpgm + %data = call i32 asm "; def $0", "=a"() + %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + call void asm "; use $0", "a"(i32 %v) + ret void +} + +define amdgpu_ps void @atomic_add_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { +; GFX90A-LABEL: atomic_add_2d_agpr: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a0 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_endpgm + %data = call i32 asm "; def $0", "=a"() + %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + call void asm "; use $0", "a"(i32 %v) + ret void +} + +; FIXME: This should directly use the AGPRs +define amdgpu_ps void @atomic_cmpswap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) { +; GFX90A-LABEL: atomic_cmpswap_1d_agpr: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a0 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a1 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 +; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 +; GFX90A-NEXT: image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_endpgm + %cmp = call i32 asm "; def $0", "=a"() + %swap = call i32 asm "; def $0", "=a"() + %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + call void asm "; use $0", "a"(i32 %v) + ret void +} + +define amdgpu_ps void @atomic_swap_1d_i64_agpr(<8 x i32> inreg %rsrc, i32 %s) { +; GFX90A-LABEL: atomic_swap_1d_i64_agpr: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a[0:1] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_endpgm + %data = call i64 asm "; def $0", "=a"() + %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + call void asm "; use $0", "a"(i64 %v) + ret void +} + +define amdgpu_ps void @atomic_cmpswap_1d_64_agpr(<8 x i32> inreg %rsrc, i32 %s) { +; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a[0:1] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 +; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a[0:1] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v5, a1 +; GFX90A-NEXT: v_accvgpr_read_b32 v4, a0 +; GFX90A-NEXT: image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: s_endpgm + %cmp = call i64 asm "; def $0", "=a"() + %swap = call i64 asm "; def $0", "=a"() + %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + call void asm "; use $0", "a"(i64 %v) + ret void +} + +define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) { +; GFX90A-LABEL: atomic_swap_1d_agpr_noret: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a0 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v1, a0 +; GFX90A-NEXT: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_endpgm + %data = call i32 asm "; def $0", "=a"() + %unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { +; GFX90A-LABEL: atomic_add_2d_agpr_noret: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a0 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 +; GFX90A-NEXT: image_atomic_add v2, v[0:1], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_endpgm + %data = call i32 asm "; def $0", "=a"() + %unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) { +; GFX90A-LABEL: atomic_cmpswap_1d_agpr_noret: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a0 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a1 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 +; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 +; GFX90A-NEXT: image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_endpgm + %cmp = call i32 asm "; def $0", "=a"() + %swap = call i32 asm "; def $0", "=a"() + %unused = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) { +; GFX90A-LABEL: atomic_swap_1d_i64_agpr_noret: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a[0:1] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 +; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 +; GFX90A-NEXT: image_atomic_swap v[2:3], v0, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_endpgm + %data = call i64 asm "; def $0", "=a"() + %unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) { +; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr_noret: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a[0:1] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 +; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def a[0:1] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_accvgpr_read_b32 v5, a1 +; GFX90A-NEXT: v_accvgpr_read_b32 v4, a0 +; GFX90A-NEXT: image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: s_endpgm + %cmp = call i64 asm "; def $0", "=a"() + %swap = call i64 asm "; def $0", "=a"() + %unused = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll index dcac419f8591d..bb4a607fc62d0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll @@ -418,6 +418,114 @@ main_body: ret <4 x float> %v } +define amdgpu_ps void @load_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) { +; GCN-LABEL: load_1d_agpr: +; GCN: ; %bb.0: +; GCN-NEXT: image_load a[0:3], v0, s[0:7] dmask:0xf unorm +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use a[0:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_endpgm + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + call void asm sideeffect "; use $0", "a"(<4 x float> %v) + ret void +} + +define amdgpu_ps void @load_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { +; GCN-LABEL: load_2d_agpr: +; GCN: ; %bb.0: +; GCN-NEXT: image_load a[0:3], v[0:1], s[0:7] dmask:0xf unorm +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use a[0:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_endpgm + %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + call void asm sideeffect "; use $0", "a"(<4 x float> %v) + ret void +} + +define amdgpu_ps void @load_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { +; GCN-LABEL: load_3d_agpr: +; GCN: ; %bb.0: +; GCN-NEXT: image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use a[0:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_endpgm + %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) + call void asm sideeffect "; use $0", "a"(<4 x float> %v) + ret void +} + +define amdgpu_ps void @load_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { +; GCN-LABEL: load_cube_agpr: +; GCN: ; %bb.0: +; GCN-NEXT: image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use a[0:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_endpgm + %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + call void asm sideeffect "; use $0", "a"(<4 x float> %v) + ret void +} + +define amdgpu_ps void @store_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) { +; GCN-LABEL: store_1d_agpr: +; GCN: ; %bb.0: +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def a[0:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: image_store a[0:3], v0, s[0:7] dmask:0xf unorm +; GCN-NEXT: s_endpgm + %vdata = call <4 x float> asm "; def $0", "=a"() + call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { +; GCN-LABEL: store_2d_agpr: +; GCN: ; %bb.0: +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def a[0:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: image_store a[0:3], v[0:1], s[0:7] dmask:0xf unorm +; GCN-NEXT: s_endpgm + %vdata = call <4 x float> asm "; def $0", "=a"() + call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { +; GCN-LABEL: store_3d_agpr: +; GCN: ; %bb.0: +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def a[0:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm +; GCN-NEXT: s_endpgm + %vdata = call <4 x float> asm "; def $0", "=a"() + call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { +; GCN-LABEL: store_cube_agpr: +; GCN: ; %bb.0: +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def a[0:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; GCN-NEXT: s_endpgm + %vdata = call <4 x float> asm "; def $0", "=a"() + call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 From 1bafd020c7c80be476f211bc239ce43424f7e0ce Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 12 Sep 2025 11:05:14 +0200 Subject: [PATCH 096/734] [libc++] Mark __{emplace,push}_back_slow_path as noinline (#94379) These are almost certainly intended to not be inlined. This significantly reduces code size when `push_back` and `emplace_back` are used heavily. Fixes #94360 --- libcxx/include/__vector/vector.h | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h index 4307e78f6ddbc..5a3c13189d52f 100644 --- a/libcxx/include/__vector/vector.h +++ b/libcxx/include/__vector/vector.h @@ -1142,6 +1142,24 @@ vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args) { return this->__end_; } +// This makes the compiler inline `__else()` if `__cond` is known to be false. Currently LLVM doesn't do that without +// the `__builtin_constant_p`, since it considers `__else` unlikely even through it's known to be run. +// See https://llvm.org/PR154292 +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __if_likely_else(bool __cond, _If __if, _Else __else) { + if (__builtin_constant_p(__cond)) { + if (__cond) + __if(); + else + __else(); + } else { + if (__cond) [[__likely__]] + __if(); + else + __else(); + } +} + template template _LIBCPP_CONSTEXPR_SINCE_CXX20 inline @@ -1152,12 +1170,14 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 inline #endif vector<_Tp, _Allocator>::emplace_back(_Args&&... __args) { pointer __end = this->__end_; - if (__end < this->__cap_) { - __emplace_back_assume_capacity(std::forward<_Args>(__args)...); - ++__end; - } else { - __end = __emplace_back_slow_path(std::forward<_Args>(__args)...); - } + std::__if_likely_else( + __end < this->__cap_, + [&] { + __emplace_back_assume_capacity(std::forward<_Args>(__args)...); + ++__end; + }, + [&] { __end = __emplace_back_slow_path(std::forward<_Args>(__args)...); }); + this->__end_ = __end; #if _LIBCPP_STD_VER >= 17 return *(__end - 1); From 381e1bb461564b829f7fa558801c317a619b32de Mon Sep 17 00:00:00 2001 From: Haibo Jiang Date: Fri, 12 Sep 2025 17:10:28 +0800 Subject: [PATCH 097/734] [BOLT] fix print-mem-data not working (#156332) This option `print-mem-data` is currently not working, use this fix to restore its functionality. --- bolt/include/bolt/Core/MCPlusBuilder.h | 3 +- bolt/lib/Core/BinaryContext.cpp | 2 +- bolt/lib/Core/MCPlusBuilder.cpp | 10 +++++-- bolt/test/AArch64/print-mem-data.test | 40 ++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 bolt/test/AArch64/print-mem-data.test diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 1c630ab8efc10..90129d475d870 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -2216,7 +2216,8 @@ class MCPlusBuilder { } /// Print each annotation attached to \p Inst. - void printAnnotations(const MCInst &Inst, raw_ostream &OS) const; + void printAnnotations(const MCInst &Inst, raw_ostream &OS, + bool PrintMemData = false) const; /// Remove annotation with a given \p Index. /// diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 6d16edfff73d1..8e2224b51fa8a 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -2044,7 +2044,7 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, if (MCSymbol *Label = MIB->getInstLabel(Instruction)) OS << " # Label: " << *Label; - MIB->printAnnotations(Instruction, OS); + MIB->printAnnotations(Instruction, OS, PrintMemData || opts::PrintMemData); if (opts::PrintDebugInfo) printDebugInfo(OS, Instruction, Function, DwCtx.get()); diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index 7f962e14ea115..52475227eb32f 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -378,8 +378,8 @@ void MCPlusBuilder::stripAnnotations(MCInst &Inst, bool KeepTC) const { setTailCall(Inst); } -void MCPlusBuilder::printAnnotations(const MCInst &Inst, - raw_ostream &OS) const { +void MCPlusBuilder::printAnnotations(const MCInst &Inst, raw_ostream &OS, + bool PrintMemData) const { std::optional FirstAnnotationOp = getFirstAnnotationOpIndex(Inst); if (!FirstAnnotationOp) return; @@ -390,7 +390,11 @@ void MCPlusBuilder::printAnnotations(const MCInst &Inst, const int64_t Value = extractAnnotationValue(Imm); const auto *Annotation = reinterpret_cast(Value); if (Index >= MCAnnotation::kGeneric) { - OS << " # " << AnnotationNames[Index - MCAnnotation::kGeneric] << ": "; + std::string AnnotationName = + AnnotationNames[Index - MCAnnotation::kGeneric]; + if (!PrintMemData && AnnotationName == "MemoryAccessProfile") + continue; + OS << " # " << AnnotationName << ": "; Annotation->print(OS); } } diff --git a/bolt/test/AArch64/print-mem-data.test b/bolt/test/AArch64/print-mem-data.test new file mode 100644 index 0000000000000..09d4f4640a454 --- /dev/null +++ b/bolt/test/AArch64/print-mem-data.test @@ -0,0 +1,40 @@ +# Check that --print-mem-data option works properly in llvm-bolt + +# RUN: split-file %s %t +# RUN: %clang %cflags -fPIC -pie %t/main.s -o %t.exe -nostdlib -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --print-mem-data=true --print-cfg \ +# RUN: --data %t/fdata | FileCheck %s -check-prefix=CHECK-PRINT +# RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg \ +# RUN: --data %t/fdata | FileCheck %s -check-prefix=CHECK-DEFAULT + +# CHECK-PRINT: ldr w2, [x1], #0x4 # MemoryAccessProfile: 7 total counts : +# CHECK-PRINT-NEXT: { 0x123: 1 }, +# CHECK-PRINT-NEXT: { 0x456: 2 }, +# CHECK-PRINT-NEXT: { 0xabc: 4 } +# CHECK-DEFAULT-NOT: MemoryAccessProfile + +#--- main.s + .text + .align 4 + .global main + .type main, %function +main: + sub sp, sp, #48 + add x1, sp, 8 + add x3, sp, 48 + mov w0, 0 +.L2: + ldr w2, [x1], 4 + add w0, w0, w2 + cmp x1, x3 + bne .L2 + add sp, sp, 48 + ret + .size main, .-main + +# The three memory access data generated by the load at +# offset 0x10 in the main. +#--- fdata +4 main 10 4 otherSym 123 1 +4 main 10 4 otherSym 456 2 +4 main 10 4 otherSym abc 4 From 47b490b4dcda4ac5d4adae88a7c90ffac8068312 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 12 Sep 2025 10:16:53 +0100 Subject: [PATCH 098/734] [AArch64][SME] Refactor MachineSMEABI pass state (NFCI) (#156674) This removes the pass state (aside from target classes) from the MachineSMEABI class, and instead passes/returns state between functions. The intention is to make dataflow (and where state is mutated) more apparent. --- .../Target/AArch64/AArch64ISelLowering.cpp | 1 + llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 294 ++++++++++-------- 2 files changed, 167 insertions(+), 128 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a194147d09396..f970f71ecb89c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9312,6 +9312,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, std::optional ZAMarkerNode; bool UseNewSMEABILowering = getTM().useNewSMEABILowering(); + if (UseNewSMEABILowering) { if (CallAttrs.requiresLazySave() || CallAttrs.requiresPreservingAllZAState()) diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index c39a5cc2fcb16..cced0faa28889 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -110,6 +110,71 @@ struct PhysRegSave { Register X0Save = AArch64::NoRegister; }; +/// Contains the needed ZA state (and live registers) at an instruction. That is +/// the state ZA must be in _before_ "InsertPt". +struct InstInfo { + ZAState NeededState{ZAState::ANY}; + MachineBasicBlock::iterator InsertPt; + LiveRegs PhysLiveRegs = LiveRegs::None; +}; + +/// Contains the needed ZA state for each instruction in a block. Instructions +/// that do not require a ZA state are not recorded. +struct BlockInfo { + ZAState FixedEntryState{ZAState::ANY}; + SmallVector Insts; + LiveRegs PhysLiveRegsAtEntry = LiveRegs::None; + LiveRegs PhysLiveRegsAtExit = LiveRegs::None; +}; + +/// Contains the needed ZA state information for all blocks within a function. +struct FunctionInfo { + SmallVector Blocks; + std::optional AfterSMEProloguePt; + LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None; +}; + +/// State/helpers that is only needed when emitting code to handle +/// saving/restoring ZA. +class EmitContext { +public: + EmitContext() = default; + + /// Get or create a TPIDR2 block in \p MF. + int getTPIDR2Block(MachineFunction &MF) { + if (TPIDR2BlockFI) + return *TPIDR2BlockFI; + MachineFrameInfo &MFI = MF.getFrameInfo(); + TPIDR2BlockFI = MFI.CreateStackObject(16, Align(16), false); + return *TPIDR2BlockFI; + } + + /// Get or create agnostic ZA buffer pointer in \p MF. + Register getAgnosticZABufferPtr(MachineFunction &MF) { + if (AgnosticZABufferPtr != AArch64::NoRegister) + return AgnosticZABufferPtr; + Register BufferPtr = + MF.getInfo()->getEarlyAllocSMESaveBuffer(); + AgnosticZABufferPtr = + BufferPtr != AArch64::NoRegister + ? BufferPtr + : MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + return AgnosticZABufferPtr; + } + + /// Returns true if the function must allocate a ZA save buffer on entry. This + /// will be the case if, at any point in the function, a ZA save was emitted. + bool needsSaveBuffer() const { + assert(!(TPIDR2BlockFI && AgnosticZABufferPtr) && + "Cannot have both a TPIDR2 block and agnostic ZA buffer"); + return TPIDR2BlockFI || AgnosticZABufferPtr != AArch64::NoRegister; + } + +private: + std::optional TPIDR2BlockFI; + Register AgnosticZABufferPtr = AArch64::NoRegister; +}; + static bool isLegalEdgeBundleZAState(ZAState State) { switch (State) { case ZAState::ACTIVE: @@ -119,9 +184,6 @@ static bool isLegalEdgeBundleZAState(ZAState State) { return false; } } -struct TPIDR2State { - int FrameIndex = -1; -}; StringRef getZAStateString(ZAState State) { #define MAKE_CASE(V) \ @@ -192,25 +254,28 @@ struct MachineSMEABI : public MachineFunctionPass { /// Collects the needed ZA state (and live registers) before each instruction /// within the machine function. - void collectNeededZAStates(SMEAttrs); + FunctionInfo collectNeededZAStates(SMEAttrs SMEFnAttrs); /// Assigns each edge bundle a ZA state based on the needed states of blocks /// that have incoming or outgoing edges in that bundle. - void assignBundleZAStates(); + SmallVector assignBundleZAStates(const EdgeBundles &Bundles, + const FunctionInfo &FnInfo); /// Inserts code to handle changes between ZA states within the function. /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA. - void insertStateChanges(); + void insertStateChanges(EmitContext &, const FunctionInfo &FnInfo, + const EdgeBundles &Bundles, + ArrayRef BundleStates); // Emission routines for private and shared ZA functions (using lazy saves). void emitNewZAPrologue(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); - void emitRestoreLazySave(MachineBasicBlock &MBB, + void emitRestoreLazySave(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs); - void emitSetupLazySave(MachineBasicBlock &MBB, + void emitSetupLazySave(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); - void emitAllocateLazySaveBuffer(MachineBasicBlock &MBB, + void emitAllocateLazySaveBuffer(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); void emitZAOff(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool ClearTPIDR2); @@ -222,78 +287,49 @@ struct MachineSMEABI : public MachineFunctionPass { // Emit a "full" ZA save or restore. It is "full" in the sense that this // function will emit a call to __arm_sme_save or __arm_sme_restore, which // handles saving and restoring both ZA and ZT0. - void emitFullZASaveRestore(MachineBasicBlock &MBB, + void emitFullZASaveRestore(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs, bool IsSave); - void emitAllocateFullZASaveBuffer(MachineBasicBlock &MBB, + void emitAllocateFullZASaveBuffer(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs); - void emitStateChange(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - ZAState From, ZAState To, LiveRegs PhysLiveRegs); + void emitStateChange(EmitContext &, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, ZAState From, + ZAState To, LiveRegs PhysLiveRegs); // Helpers for switching between lazy/full ZA save/restore routines. - void emitZASave(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - LiveRegs PhysLiveRegs) { + void emitZASave(EmitContext &Context, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) { if (AFI->getSMEFnAttrs().hasAgnosticZAInterface()) - return emitFullZASaveRestore(MBB, MBBI, PhysLiveRegs, /*IsSave=*/true); - return emitSetupLazySave(MBB, MBBI); + return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs, + /*IsSave=*/true); + return emitSetupLazySave(Context, MBB, MBBI); } - void emitZARestore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - LiveRegs PhysLiveRegs) { + void emitZARestore(EmitContext &Context, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) { if (AFI->getSMEFnAttrs().hasAgnosticZAInterface()) - return emitFullZASaveRestore(MBB, MBBI, PhysLiveRegs, /*IsSave=*/false); - return emitRestoreLazySave(MBB, MBBI, PhysLiveRegs); + return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs, + /*IsSave=*/false); + return emitRestoreLazySave(Context, MBB, MBBI, PhysLiveRegs); } - void emitAllocateZASaveBuffer(MachineBasicBlock &MBB, + void emitAllocateZASaveBuffer(EmitContext &Context, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) { if (AFI->getSMEFnAttrs().hasAgnosticZAInterface()) - return emitAllocateFullZASaveBuffer(MBB, MBBI, PhysLiveRegs); - return emitAllocateLazySaveBuffer(MBB, MBBI); + return emitAllocateFullZASaveBuffer(Context, MBB, MBBI, PhysLiveRegs); + return emitAllocateLazySaveBuffer(Context, MBB, MBBI); } /// Save live physical registers to virtual registers. PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL); /// Restore physical registers from a save of their previous values. - void restorePhyRegSave(PhysRegSave const &RegSave, MachineBasicBlock &MBB, + void restorePhyRegSave(const PhysRegSave &RegSave, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL); - /// Get or create a TPIDR2 block in this function. - TPIDR2State getTPIDR2Block(); - - Register getAgnosticZABufferPtr(); - private: - /// Contains the needed ZA state (and live registers) at an instruction. - struct InstInfo { - ZAState NeededState{ZAState::ANY}; - MachineBasicBlock::iterator InsertPt; - LiveRegs PhysLiveRegs = LiveRegs::None; - }; - - /// Contains the needed ZA state for each instruction in a block. - /// Instructions that do not require a ZA state are not recorded. - struct BlockInfo { - ZAState FixedEntryState{ZAState::ANY}; - SmallVector Insts; - LiveRegs PhysLiveRegsAtEntry = LiveRegs::None; - LiveRegs PhysLiveRegsAtExit = LiveRegs::None; - }; - - // All pass state that must be cleared between functions. - struct PassState { - SmallVector Blocks; - SmallVector BundleStates; - std::optional TPIDR2Block; - std::optional AfterSMEProloguePt; - Register AgnosticZABufferPtr = AArch64::NoRegister; - LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None; - } State; - MachineFunction *MF = nullptr; - EdgeBundles *Bundles = nullptr; const AArch64Subtarget *Subtarget = nullptr; const AArch64RegisterInfo *TRI = nullptr; const AArch64FunctionInfo *AFI = nullptr; @@ -301,14 +337,18 @@ struct MachineSMEABI : public MachineFunctionPass { MachineRegisterInfo *MRI = nullptr; }; -void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { +FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) && "Expected function to have ZA/ZT0 state!"); - State.Blocks.resize(MF->getNumBlockIDs()); + SmallVector Blocks(MF->getNumBlockIDs()); + LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None; + std::optional AfterSMEProloguePt; + for (MachineBasicBlock &MBB : *MF) { - BlockInfo &Block = State.Blocks[MBB.getNumber()]; + BlockInfo &Block = Blocks[MBB.getNumber()]; + if (MBB.isEntryBlock()) { // Entry block: Block.FixedEntryState = SMEFnAttrs.hasPrivateZAInterface() @@ -347,8 +387,8 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { // allocation -- which is a safe point for this pass to insert any TPIDR2 // block setup. if (MI.getOpcode() == AArch64::SMEStateAllocPseudo) { - State.AfterSMEProloguePt = MBBI; - State.PhysLiveRegsAfterSMEPrologue = PhysLiveRegs; + AfterSMEProloguePt = MBBI; + PhysLiveRegsAfterSMEPrologue = PhysLiveRegs; } // Note: We treat Agnostic ZA as inout_za with an alternate save/restore. auto [NeededState, InsertPt] = getZAStateBeforeInst( @@ -368,11 +408,18 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { // Reverse vector (as we had to iterate backwards for liveness). std::reverse(Block.Insts.begin(), Block.Insts.end()); } + + return FunctionInfo{std::move(Blocks), AfterSMEProloguePt, + PhysLiveRegsAfterSMEPrologue}; } -void MachineSMEABI::assignBundleZAStates() { - State.BundleStates.resize(Bundles->getNumBundles()); - for (unsigned I = 0, E = Bundles->getNumBundles(); I != E; ++I) { +/// Assigns each edge bundle a ZA state based on the needed states of blocks +/// that have incoming or outgoing edges in that bundle. +SmallVector +MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles, + const FunctionInfo &FnInfo) { + SmallVector BundleStates(Bundles.getNumBundles()); + for (unsigned I = 0, E = Bundles.getNumBundles(); I != E; ++I) { LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n'); // Attempt to assign a ZA state for this bundle that minimizes state @@ -381,16 +428,16 @@ void MachineSMEABI::assignBundleZAStates() { // TODO: We should propagate desired incoming/outgoing states through blocks // that have the "ANY" state first to make better global decisions. int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0}; - for (unsigned BlockID : Bundles->getBlocks(I)) { + for (unsigned BlockID : Bundles.getBlocks(I)) { LLVM_DEBUG(dbgs() << "- bb." << BlockID); - const BlockInfo &Block = State.Blocks[BlockID]; + const BlockInfo &Block = FnInfo.Blocks[BlockID]; if (Block.Insts.empty()) { LLVM_DEBUG(dbgs() << " (no state preference)\n"); continue; } - bool InEdge = Bundles->getBundle(BlockID, /*Out=*/false) == I; - bool OutEdge = Bundles->getBundle(BlockID, /*Out=*/true) == I; + bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I; + bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I; ZAState DesiredIncomingState = Block.Insts.front().NeededState; if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) { @@ -423,15 +470,20 @@ void MachineSMEABI::assignBundleZAStates() { dbgs() << "\n\n"; }); - State.BundleStates[I] = BundleState; + BundleStates[I] = BundleState; } + + return BundleStates; } -void MachineSMEABI::insertStateChanges() { +void MachineSMEABI::insertStateChanges(EmitContext &Context, + const FunctionInfo &FnInfo, + const EdgeBundles &Bundles, + ArrayRef BundleStates) { for (MachineBasicBlock &MBB : *MF) { - const BlockInfo &Block = State.Blocks[MBB.getNumber()]; - ZAState InState = State.BundleStates[Bundles->getBundle(MBB.getNumber(), - /*Out=*/false)]; + const BlockInfo &Block = FnInfo.Blocks[MBB.getNumber()]; + ZAState InState = BundleStates[Bundles.getBundle(MBB.getNumber(), + /*Out=*/false)]; ZAState CurrentState = Block.FixedEntryState; if (CurrentState == ZAState::ANY) @@ -439,8 +491,8 @@ void MachineSMEABI::insertStateChanges() { for (auto &Inst : Block.Insts) { if (CurrentState != Inst.NeededState) - emitStateChange(MBB, Inst.InsertPt, CurrentState, Inst.NeededState, - Inst.PhysLiveRegs); + emitStateChange(Context, MBB, Inst.InsertPt, CurrentState, + Inst.NeededState, Inst.PhysLiveRegs); CurrentState = Inst.NeededState; } @@ -448,21 +500,13 @@ void MachineSMEABI::insertStateChanges() { continue; ZAState OutState = - State.BundleStates[Bundles->getBundle(MBB.getNumber(), /*Out=*/true)]; + BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)]; if (CurrentState != OutState) - emitStateChange(MBB, MBB.getFirstTerminator(), CurrentState, OutState, - Block.PhysLiveRegsAtExit); + emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState, + OutState, Block.PhysLiveRegsAtExit); } } -TPIDR2State MachineSMEABI::getTPIDR2Block() { - if (State.TPIDR2Block) - return *State.TPIDR2Block; - MachineFrameInfo &MFI = MF->getFrameInfo(); - State.TPIDR2Block = TPIDR2State{MFI.CreateStackObject(16, Align(16), false)}; - return *State.TPIDR2Block; -} - static DebugLoc getDebugLoc(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { if (MBBI != MBB.end()) @@ -470,7 +514,8 @@ static DebugLoc getDebugLoc(MachineBasicBlock &MBB, return DebugLoc(); } -void MachineSMEABI::emitSetupLazySave(MachineBasicBlock &MBB, +void MachineSMEABI::emitSetupLazySave(EmitContext &Context, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { DebugLoc DL = getDebugLoc(MBB, MBBI); @@ -478,7 +523,7 @@ void MachineSMEABI::emitSetupLazySave(MachineBasicBlock &MBB, Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass); Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2) - .addFrameIndex(getTPIDR2Block().FrameIndex) + .addFrameIndex(Context.getTPIDR2Block(*MF)) .addImm(0) .addImm(0); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), TPIDR2Ptr) @@ -512,7 +557,7 @@ PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs, return RegSave; } -void MachineSMEABI::restorePhyRegSave(PhysRegSave const &RegSave, +void MachineSMEABI::restorePhyRegSave(const PhysRegSave &RegSave, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL) { @@ -528,7 +573,8 @@ void MachineSMEABI::restorePhyRegSave(PhysRegSave const &RegSave, .addReg(RegSave.X0Save); } -void MachineSMEABI::emitRestoreLazySave(MachineBasicBlock &MBB, +void MachineSMEABI::emitRestoreLazySave(EmitContext &Context, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) { auto *TLI = Subtarget->getTargetLowering(); @@ -548,7 +594,7 @@ void MachineSMEABI::emitRestoreLazySave(MachineBasicBlock &MBB, .addImm(AArch64SysReg::TPIDR2_EL0); // Get pointer to TPIDR2 block. BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2) - .addFrameIndex(getTPIDR2Block().FrameIndex) + .addFrameIndex(Context.getTPIDR2Block(*MF)) .addImm(0) .addImm(0); // (Conditionally) restore ZA state. @@ -582,7 +628,8 @@ void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB, } void MachineSMEABI::emitAllocateLazySaveBuffer( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { + EmitContext &Context, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { MachineFrameInfo &MFI = MF->getFrameInfo(); DebugLoc DL = getDebugLoc(MBB, MBBI); Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass); @@ -630,7 +677,7 @@ void MachineSMEABI::emitAllocateLazySaveBuffer( BuildMI(MBB, MBBI, DL, TII->get(AArch64::STPXi)) .addReg(Buffer) .addReg(SVL) - .addFrameIndex(getTPIDR2Block().FrameIndex) + .addFrameIndex(Context.getTPIDR2Block(*MF)) .addImm(0); } } @@ -662,18 +709,8 @@ void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB, .addImm(1); } -Register MachineSMEABI::getAgnosticZABufferPtr() { - if (State.AgnosticZABufferPtr != AArch64::NoRegister) - return State.AgnosticZABufferPtr; - Register BufferPtr = AFI->getEarlyAllocSMESaveBuffer(); - State.AgnosticZABufferPtr = - BufferPtr != AArch64::NoRegister - ? BufferPtr - : MF->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); - return State.AgnosticZABufferPtr; -} - -void MachineSMEABI::emitFullZASaveRestore(MachineBasicBlock &MBB, +void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs, bool IsSave) { auto *TLI = Subtarget->getTargetLowering(); @@ -684,7 +721,7 @@ void MachineSMEABI::emitFullZASaveRestore(MachineBasicBlock &MBB, // Copy the buffer pointer into X0. BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr) - .addReg(getAgnosticZABufferPtr()); + .addReg(Context.getAgnosticZABufferPtr(*MF)); // Call __arm_sme_save/__arm_sme_restore. BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) @@ -699,14 +736,14 @@ void MachineSMEABI::emitFullZASaveRestore(MachineBasicBlock &MBB, } void MachineSMEABI::emitAllocateFullZASaveBuffer( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - LiveRegs PhysLiveRegs) { + EmitContext &Context, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) { // Buffer already allocated in SelectionDAG. if (AFI->getEarlyAllocSMESaveBuffer()) return; DebugLoc DL = getDebugLoc(MBB, MBBI); - Register BufferPtr = getAgnosticZABufferPtr(); + Register BufferPtr = Context.getAgnosticZABufferPtr(*MF); Register BufferSize = MRI->createVirtualRegister(&AArch64::GPR64RegClass); PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL); @@ -742,11 +779,11 @@ void MachineSMEABI::emitAllocateFullZASaveBuffer( restorePhyRegSave(RegSave, MBB, MBBI, DL); } -void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB, +void MachineSMEABI::emitStateChange(EmitContext &Context, + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, ZAState From, ZAState To, LiveRegs PhysLiveRegs) { - // ZA not used. if (From == ZAState::ANY || To == ZAState::ANY) return; @@ -774,9 +811,9 @@ void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB, } if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED) - emitZASave(MBB, InsertPt, PhysLiveRegs); + emitZASave(Context, MBB, InsertPt, PhysLiveRegs); else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE) - emitZARestore(MBB, InsertPt, PhysLiveRegs); + emitZARestore(Context, MBB, InsertPt, PhysLiveRegs); else if (To == ZAState::OFF) { assert(From != ZAState::CALLER_DORMANT && "CALLER_DORMANT to OFF should have already been handled"); @@ -807,32 +844,33 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) { assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!"); - // Reset pass state. - State = PassState{}; this->MF = &MF; - Bundles = &getAnalysis().getEdgeBundles(); Subtarget = &MF.getSubtarget(); TII = Subtarget->getInstrInfo(); TRI = Subtarget->getRegisterInfo(); MRI = &MF.getRegInfo(); - collectNeededZAStates(SMEFnAttrs); - assignBundleZAStates(); - insertStateChanges(); + const EdgeBundles &Bundles = + getAnalysis().getEdgeBundles(); + + FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs); + SmallVector BundleStates = assignBundleZAStates(Bundles, FnInfo); + + EmitContext Context; + insertStateChanges(Context, FnInfo, Bundles, BundleStates); - // Allocate save buffer (if needed). - if (State.AgnosticZABufferPtr != AArch64::NoRegister || State.TPIDR2Block) { - if (State.AfterSMEProloguePt) { + if (Context.needsSaveBuffer()) { + if (FnInfo.AfterSMEProloguePt) { // Note: With inline stack probes the AfterSMEProloguePt may not be in the // entry block (due to the probing loop). - emitAllocateZASaveBuffer(*(*State.AfterSMEProloguePt)->getParent(), - *State.AfterSMEProloguePt, - State.PhysLiveRegsAfterSMEPrologue); + MachineBasicBlock::iterator MBBI = *FnInfo.AfterSMEProloguePt; + emitAllocateZASaveBuffer(Context, *MBBI->getParent(), MBBI, + FnInfo.PhysLiveRegsAfterSMEPrologue); } else { MachineBasicBlock &EntryBlock = MF.front(); emitAllocateZASaveBuffer( - EntryBlock, EntryBlock.getFirstNonPHI(), - State.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry); + Context, EntryBlock, EntryBlock.getFirstNonPHI(), + FnInfo.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry); } } From 2331fbb01978463a218d80883d29a003fdef6e14 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 18:18:50 +0900 Subject: [PATCH 099/734] CodeGen: Remove MachineFunction argument from getPointerRegClass (#158185) getPointerRegClass is a layering violation. Its primary purpose is to determine how to interpret an MCInstrDesc's operands RegClass fields. This should be context free, and only depend on the subtarget. The model of this is also wrong, since this should be an instruction / operand specific property, not a global pointer class. Remove the the function argument to help stage removal of this hook and avoid introducing any new obstacles to replacing it. The remaining uses of the function were to get the subtarget, which TargetRegisterInfo already belongs to. A few targets needed new subtarget derived properties copied there. --- llvm/include/llvm/CodeGen/TargetRegisterInfo.h | 2 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 2 +- llvm/lib/CodeGen/MachineInstr.cpp | 2 +- llvm/lib/CodeGen/TargetInstrInfo.cpp | 2 +- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 2 +- llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 3 +-- llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 3 +-- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 4 ++-- llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 4 ++-- llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 3 +-- llvm/lib/Target/ARM/ARMBaseRegisterInfo.h | 3 +-- llvm/lib/Target/ARM/Thumb1InstrInfo.cpp | 2 +- llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 2 +- llvm/lib/Target/ARM/ThumbRegisterInfo.cpp | 12 ++++++------ llvm/lib/Target/ARM/ThumbRegisterInfo.h | 8 +++++--- llvm/lib/Target/AVR/AVRRegisterInfo.cpp | 3 +-- llvm/lib/Target/AVR/AVRRegisterInfo.h | 3 +-- llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp | 3 +-- llvm/lib/Target/Hexagon/HexagonRegisterInfo.h | 3 +-- llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h | 3 +-- llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp | 3 +-- llvm/lib/Target/MSP430/MSP430RegisterInfo.h | 5 ++--- llvm/lib/Target/Mips/Mips16InstrInfo.cpp | 2 +- llvm/lib/Target/Mips/Mips16RegisterInfo.cpp | 3 ++- llvm/lib/Target/Mips/Mips16RegisterInfo.h | 3 +-- llvm/lib/Target/Mips/MipsRegisterInfo.cpp | 13 ++++++------- llvm/lib/Target/Mips/MipsRegisterInfo.h | 8 +++++--- llvm/lib/Target/Mips/MipsSEInstrInfo.cpp | 2 +- llvm/lib/Target/Mips/MipsSERegisterInfo.cpp | 3 ++- llvm/lib/Target/Mips/MipsSERegisterInfo.h | 2 +- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 2 +- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 5 ++--- llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 2 +- llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 3 +-- llvm/lib/Target/Sparc/SparcInstrInfo.cpp | 2 +- llvm/lib/Target/Sparc/SparcRegisterInfo.cpp | 12 ++++++------ llvm/lib/Target/Sparc/SparcRegisterInfo.h | 11 ++++++++--- llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 2 +- llvm/lib/Target/SystemZ/SystemZRegisterInfo.h | 3 +-- llvm/lib/Target/VE/VERegisterInfo.cpp | 3 +-- llvm/lib/Target/VE/VERegisterInfo.h | 3 +-- .../Target/WebAssembly/WebAssemblyFrameLowering.cpp | 4 ++-- .../Target/WebAssembly/WebAssemblyRegisterInfo.cpp | 10 ++++------ .../Target/WebAssembly/WebAssemblyRegisterInfo.h | 3 +-- llvm/lib/Target/X86/X86RegisterInfo.cpp | 13 ++++++------- llvm/lib/Target/X86/X86RegisterInfo.h | 6 ++++-- 46 files changed, 94 insertions(+), 103 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 73ccc8ed5b11d..3f576b2007137 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -883,7 +883,7 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo { /// If a target supports multiple different pointer register classes, /// kind specifies which one is indicated. virtual const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const { + getPointerRegClass(unsigned Kind = 0) const { llvm_unreachable("Target didn't implement getPointerRegClass!"); } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 541269ab6bfce..768e3713f78e2 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1863,7 +1863,7 @@ bool IRTranslator::translateVectorDeinterleave2Intrinsic( void IRTranslator::getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF)); + MRI->setRegClass(DstReg, TRI->getPointerRegClass()); auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {}); diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 79047f732808a..55ec049453607 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1003,7 +1003,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, // Assume that all registers in a memory operand are pointers. if (F.isMemKind()) - return TRI->getPointerRegClass(MF); + return TRI->getPointerRegClass(); return nullptr; } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 0d7b128fc736e..f0da03b876d6a 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -67,7 +67,7 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, short RegClass = MCID.operands()[OpNum].RegClass; if (MCID.operands()[OpNum].isLookupPtrRegClass()) - return TRI->getPointerRegClass(MF, RegClass); + return TRI->getPointerRegClass(RegClass); // Instructions like INSERT_SUBREG do not have fixed register classes. if (RegClass < 0) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 6fdc981fc21a5..10671f09551a4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -574,7 +574,7 @@ bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( // We need to make sure that this one operand does not end up in XZR, thus // require the address to be in a PointerRegClass register. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); - const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); + const TargetRegisterClass *TRC = TRI->getPointerRegClass(); SDLoc dl(Op); SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); SDValue NewOp = diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 77dfab83a834a..8d167b56e6ca3 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -610,8 +610,7 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF, } const TargetRegisterClass * -AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { +AArch64RegisterInfo::getPointerRegClass(unsigned Kind) const { return &AArch64::GPR64spRegClass; } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index 1ed8e959fdd2d..72a7676241770 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -102,8 +102,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo { bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override; const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; + getPointerRegClass(unsigned Kind = 0) const override; const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 22488384759be..205237fefe785 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1108,8 +1108,8 @@ bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, SIInstrFlags::FlatScratch); } -const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( - const MachineFunction &MF, unsigned Kind) const { +const TargetRegisterClass * +SIRegisterInfo::getPointerRegClass(unsigned Kind) const { // This is inaccurate. It depends on the instruction and address space. The // only place where we should hit this is for dealing with frame indexes / // private accesses, so this is correct in that case. diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index eeefef1116aa3..7b91ba7bc581f 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -154,8 +154,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override; - const TargetRegisterClass *getPointerRegClass( - const MachineFunction &MF, unsigned Kind = 0) const override; + const TargetRegisterClass * + getPointerRegClass(unsigned Kind = 0) const override; /// Returns a legal register class to copy a register in the specified class /// to or from. If it is possible to copy the register directly without using diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index bc20daf0cfbbc..0d4ecaec1c23e 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -310,8 +310,7 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, } const TargetRegisterClass * -ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) - const { +ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return &ARM::GPRRegClass; } diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 69e10ac2a54d2..5b67b34089d7e 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -91,8 +91,7 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo { MCRegister PhysReg) const override; const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; + getPointerRegClass(unsigned Kind = 0) const override; const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index ce4ee157289df..4b8c2fd569ead 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI) {} + : ARMBaseInstrInfo(STI), RI(STI) {} /// Return the noop instruction to use for a noop. MCInst Thumb1InstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index e91441b12fe6f..9dd0e430a0ea1 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -46,7 +46,7 @@ PreferNoCSEL("prefer-no-csel", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI) {} + : ARMBaseInstrInfo(STI), RI(STI) {} /// Return the noop instruction to use for a noop. MCInst Thumb2InstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 911502605c227..12875c233312a 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -35,12 +35,13 @@ extern cl::opt ReuseFrameIndexVals; using namespace llvm; -ThumbRegisterInfo::ThumbRegisterInfo() = default; +ThumbRegisterInfo::ThumbRegisterInfo(const ARMSubtarget &STI) + : IsThumb1Only(STI.isThumb1Only()) {} const TargetRegisterClass * ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const { - if (!MF.getSubtarget().isThumb1Only()) + if (!IsThumb1Only) return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF); if (ARM::tGPRRegClass.hasSubClassEq(RC)) @@ -49,10 +50,9 @@ ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, } const TargetRegisterClass * -ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { - if (!MF.getSubtarget().isThumb1Only()) - return ARMBaseRegisterInfo::getPointerRegClass(MF, Kind); +ThumbRegisterInfo::getPointerRegClass(unsigned Kind) const { + if (!IsThumb1Only) + return ARMBaseRegisterInfo::getPointerRegClass(Kind); return &ARM::tGPRRegClass; } diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.h b/llvm/lib/Target/ARM/ThumbRegisterInfo.h index ccfe211b808a5..1512a09cae200 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.h +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.h @@ -23,16 +23,18 @@ namespace llvm { class ARMBaseInstrInfo; struct ThumbRegisterInfo : public ARMBaseRegisterInfo { +private: + const bool IsThumb1Only; + public: - ThumbRegisterInfo(); + explicit ThumbRegisterInfo(const ARMSubtarget &STI); const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override; const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; + getPointerRegClass(unsigned Kind = 0) const override; /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp index 051affe7110dd..18bea848baeab 100644 --- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp @@ -289,8 +289,7 @@ Register AVRRegisterInfo::getFrameRegister(const MachineFunction &MF) const { } const TargetRegisterClass * -AVRRegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { +AVRRegisterInfo::getPointerRegClass(unsigned Kind) const { // FIXME: Currently we're using avr-gcc as reference, so we restrict // ptrs to Y and Z regs. Though avr-gcc has buggy implementation // of memory constraint, so we can fix it and bit avr-gcc here ;-) diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.h b/llvm/lib/Target/AVR/AVRRegisterInfo.h index 8eb0cf3039bbd..e69696b4d9160 100644 --- a/llvm/lib/Target/AVR/AVRRegisterInfo.h +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.h @@ -44,8 +44,7 @@ class AVRRegisterInfo : public AVRGenRegisterInfo { Register getFrameRegister(const MachineFunction &MF) const override; const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; + getPointerRegClass(unsigned Kind = 0) const override; /// Splits a 16-bit `DREGS` register into the lo/hi register pair. /// \param Reg A 16-bit register to split. diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2731c523963e5..77ce983d24785 100644 --- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -444,7 +444,6 @@ bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) } const TargetRegisterClass * -HexagonRegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { +HexagonRegisterInfo::getPointerRegClass(unsigned Kind) const { return &Hexagon::IntRegsRegClass; } diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h index 72153980236e9..945b8608cd948 100644 --- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -72,8 +72,7 @@ class HexagonRegisterInfo : public HexagonGenRegisterInfo { const TargetRegisterClass *RC) const; const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; + getPointerRegClass(unsigned Kind = 0) const override; bool isEHReturnCalleeSaveReg(Register Reg) const; }; diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h index d1e40254c2972..53381c28898b8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h @@ -33,8 +33,7 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const override; const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override { + getPointerRegClass(unsigned Kind = 0) const override { return &LoongArch::GPRRegClass; } diff --git a/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp index 44596a1527a2d..c1a1e8e83e0d3 100644 --- a/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -91,8 +91,7 @@ BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { } const TargetRegisterClass * -MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) - const { +MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const { return &MSP430::GR16RegClass; } diff --git a/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/llvm/lib/Target/MSP430/MSP430RegisterInfo.h index 51e07f4e8e9ea..fbca97361232d 100644 --- a/llvm/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/llvm/lib/Target/MSP430/MSP430RegisterInfo.h @@ -28,9 +28,8 @@ class MSP430RegisterInfo : public MSP430GenRegisterInfo { const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; - const TargetRegisterClass* - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; + const TargetRegisterClass * + getPointerRegClass(unsigned Kind = 0) const override; bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp index cafc11b8a0d9b..5d08f560c3c36 100644 --- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp @@ -37,7 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips16-instrinfo" Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI) - : MipsInstrInfo(STI, Mips::Bimm16) {} + : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {} const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { return RI; diff --git a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp index d257f02b2bc6f..66099593b6311 100644 --- a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -28,7 +28,8 @@ using namespace llvm; #define DEBUG_TYPE "mips16-registerinfo" -Mips16RegisterInfo::Mips16RegisterInfo() = default; +Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &STI) + : MipsRegisterInfo(STI) {} bool Mips16RegisterInfo::requiresRegisterScavenging (const MachineFunction &MF) const { diff --git a/llvm/lib/Target/Mips/Mips16RegisterInfo.h b/llvm/lib/Target/Mips/Mips16RegisterInfo.h index ff115b30162b9..29d08b4003ed4 100644 --- a/llvm/lib/Target/Mips/Mips16RegisterInfo.h +++ b/llvm/lib/Target/Mips/Mips16RegisterInfo.h @@ -16,10 +16,9 @@ #include "MipsRegisterInfo.h" namespace llvm { - class Mips16RegisterInfo : public MipsRegisterInfo { public: - Mips16RegisterInfo(); + explicit Mips16RegisterInfo(const MipsSubtarget &STI); bool requiresRegisterScavenging(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp index 539288e8da592..4d105bddd4d9c 100644 --- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp @@ -37,27 +37,26 @@ using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "MipsGenRegisterInfo.inc" -MipsRegisterInfo::MipsRegisterInfo() : MipsGenRegisterInfo(Mips::RA) { +MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &STI) + : MipsGenRegisterInfo(Mips::RA), ArePtrs64bit(STI.getABI().ArePtrs64bit()) { MIPS_MC::initLLVMToCVRegMapping(this); } unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } const TargetRegisterClass * -MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { - MipsABIInfo ABI = MF.getSubtarget().getABI(); +MipsRegisterInfo::getPointerRegClass(unsigned Kind) const { MipsPtrClass PtrClassKind = static_cast(Kind); switch (PtrClassKind) { case MipsPtrClass::Default: - return ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; + return ArePtrs64bit ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; case MipsPtrClass::GPR16MM: return &Mips::GPRMM16RegClass; case MipsPtrClass::StackPointer: - return ABI.ArePtrs64bit() ? &Mips::SP64RegClass : &Mips::SP32RegClass; + return ArePtrs64bit ? &Mips::SP64RegClass : &Mips::SP32RegClass; case MipsPtrClass::GlobalPointer: - return ABI.ArePtrs64bit() ? &Mips::GP64RegClass : &Mips::GP32RegClass; + return ArePtrs64bit ? &Mips::GP64RegClass : &Mips::GP32RegClass; } llvm_unreachable("Unknown pointer kind"); diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.h b/llvm/lib/Target/Mips/MipsRegisterInfo.h index b002f4cf3ae7a..dbdb0501998bf 100644 --- a/llvm/lib/Target/Mips/MipsRegisterInfo.h +++ b/llvm/lib/Target/Mips/MipsRegisterInfo.h @@ -25,6 +25,9 @@ namespace llvm { class TargetRegisterClass; class MipsRegisterInfo : public MipsGenRegisterInfo { +private: + const bool ArePtrs64bit; + public: enum class MipsPtrClass { /// The default register class for integer values. @@ -38,14 +41,13 @@ class MipsRegisterInfo : public MipsGenRegisterInfo { GlobalPointer = 3, }; - MipsRegisterInfo(); + explicit MipsRegisterInfo(const MipsSubtarget &STI); /// Get PIC indirect call register static unsigned getPICCallReg(); /// Code Generation virtual methods... - const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const override; + const TargetRegisterClass *getPointerRegClass(unsigned Kind) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index caa20f72aacf9..9f00369d8998a 100644 --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -28,7 +28,7 @@ static unsigned getUnconditionalBranch(const MipsSubtarget &STI) { } MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI) - : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI() {} + : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI(STI) {} const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { return RI; diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp index feb2b3d2010b4..1326878f7e17e 100644 --- a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -34,7 +34,8 @@ using namespace llvm; #define DEBUG_TYPE "mips-reg-info" -MipsSERegisterInfo::MipsSERegisterInfo() = default; +MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &STI) + : MipsRegisterInfo(STI) {} bool MipsSERegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.h b/llvm/lib/Target/Mips/MipsSERegisterInfo.h index cc8496e0268be..93de2c778063a 100644 --- a/llvm/lib/Target/Mips/MipsSERegisterInfo.h +++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.h @@ -20,7 +20,7 @@ namespace llvm { class MipsSERegisterInfo : public MipsRegisterInfo { public: - MipsSERegisterInfo(); + explicit MipsSERegisterInfo(const MipsSubtarget &STI); bool requiresRegisterScavenging(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 415164fc9e2cb..89165fa8f8fdb 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -401,7 +401,7 @@ namespace { // We need to make sure that this one operand does not end up in r0 // (because we might end up lowering this as 0(%op)). const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); - const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); + const TargetRegisterClass *TRC = TRI->getPointerRegClass(/*Kind=*/1); SDLoc dl(Op); SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); SDValue NewOp = diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index f1230407b1649..366bc73ac52f3 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -164,8 +164,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * -PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) - const { +PPCRegisterInfo::getPointerRegClass(unsigned Kind) const { // Note that PPCInstrInfo::foldImmediate also directly uses this Kind value // when it checks for ZERO folding. if (Kind == 1) { @@ -2022,7 +2021,7 @@ Register PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const TargetRegisterClass *RC = getPointerRegClass(MF); + const TargetRegisterClass *RC = getPointerRegClass(); Register BaseReg = MRI.createVirtualRegister(RC); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 849f856b5419e..560690208f704 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -79,7 +79,7 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override; + getPointerRegClass(unsigned Kind = 0) const override; const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 2810139bf52ea..67726db504122 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -123,8 +123,7 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { } const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override { + getPointerRegClass(unsigned Kind = 0) const override { return &RISCV::GPRRegClass; } diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index cd0f649912980..e28f4457263f4 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -38,7 +38,7 @@ static cl::opt void SparcInstrInfo::anchor() {} SparcInstrInfo::SparcInstrInfo(const SparcSubtarget &ST) - : SparcGenInstrInfo(ST, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(), + : SparcGenInstrInfo(ST, SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(ST), Subtarget(ST) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp index e4db27a63076d..0a14746f587bb 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -31,7 +31,8 @@ static cl::opt ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false), cl::desc("Reserve application registers (%g2-%g4)")); -SparcRegisterInfo::SparcRegisterInfo() : SparcGenRegisterInfo(SP::O7) {} +SparcRegisterInfo::SparcRegisterInfo(const SparcSubtarget &STI) + : SparcGenRegisterInfo(SP::O7), Is64Bit(STI.is64Bit()) {} const MCPhysReg* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -111,11 +112,10 @@ bool SparcRegisterInfo::isReservedReg(const MachineFunction &MF, return getReservedRegs(MF)[Reg]; } -const TargetRegisterClass* -SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { - const SparcSubtarget &Subtarget = MF.getSubtarget(); - return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; +const TargetRegisterClass * +SparcRegisterInfo::getPointerRegClass(unsigned Kind) const { + assert(Kind == 0 && "this should only be used for default cases"); + return Is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; } static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II, diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/llvm/lib/Target/Sparc/SparcRegisterInfo.h index eae859ce1a519..abd8baeff56a2 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.h +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.h @@ -19,8 +19,14 @@ #include "SparcGenRegisterInfo.inc" namespace llvm { +class SparcSubtarget; + struct SparcRegisterInfo : public SparcGenRegisterInfo { - SparcRegisterInfo(); +private: + const bool Is64Bit; + +public: + explicit SparcRegisterInfo(const SparcSubtarget &STI); /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; @@ -32,8 +38,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const override; bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const; - const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const override; + const TargetRegisterClass *getPointerRegClass(unsigned Kind) const override; bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 6f146b67f8566..a05fdc74e6366 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1851,7 +1851,7 @@ bool SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand( if (selectBDXAddr(Form, DispRange, Op, Base, Disp, Index)) { const TargetRegisterClass *TRC = - Subtarget->getRegisterInfo()->getPointerRegClass(*MF); + Subtarget->getRegisterInfo()->getPointerRegClass(); SDLoc DL(Base); SDValue RC = CurDAG->getTargetConstant(TRC->getID(), DL, MVT::i32); diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 460be432811a4..b1de145db3d31 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -135,8 +135,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { /// This is currently only used by LOAD_STACK_GUARD, which requires a non-%r0 /// register, hence ADDR64. const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind=0) const override { + getPointerRegClass(unsigned Kind = 0) const override { return &SystemZ::ADDR64BitRegClass; } diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp index f381b7d321598..99e1f61c088eb 100644 --- a/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -93,8 +93,7 @@ BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const { } const TargetRegisterClass * -VERegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { +VERegisterInfo::getPointerRegClass(unsigned Kind) const { return &VE::I64RegClass; } diff --git a/llvm/lib/Target/VE/VERegisterInfo.h b/llvm/lib/Target/VE/VERegisterInfo.h index 3f6feedf42534..999dc856c9bd5 100644 --- a/llvm/lib/Target/VE/VERegisterInfo.h +++ b/llvm/lib/Target/VE/VERegisterInfo.h @@ -31,8 +31,7 @@ struct VERegisterInfo : public VEGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const override; - const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const override; + const TargetRegisterClass *getPointerRegClass(unsigned Kind) const override; bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 2f36e26066d81..27f7e1ada1250 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -278,7 +278,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, DebugLoc DL; const TargetRegisterClass *PtrRC = - MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + MRI.getTargetRegisterInfo()->getPointerRegClass(); unsigned SPReg = getSPReg(MF); if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); @@ -349,7 +349,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, SPReg = FI->getBasePointerVreg(); } else if (StackSize) { const TargetRegisterClass *PtrRC = - MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + MRI.getTargetRegisterInfo()->getPointerRegClass(); Register OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg) .addImm(StackSize); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 18886ba570681..ebb5f555df67a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -117,7 +117,7 @@ bool WebAssemblyRegisterInfo::eliminateFrameIndex( if (FrameOffset) { // Create i32/64.add SP, offset and make it the operand. const TargetRegisterClass *PtrRC = - MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + MRI.getTargetRegisterInfo()->getPointerRegClass(); Register OffsetOp = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssemblyFrameLowering::getOpcConst(MF)), @@ -149,10 +149,8 @@ WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const { } const TargetRegisterClass * -WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { +WebAssemblyRegisterInfo::getPointerRegClass(unsigned Kind) const { assert(Kind == 0 && "Only one kind of pointer on WebAssembly"); - if (MF.getSubtarget().hasAddr64()) - return &WebAssembly::I64RegClass; - return &WebAssembly::I32RegClass; + return TT.getArch() == Triple::wasm64 ? &WebAssembly::I64RegClass + : &WebAssembly::I32RegClass; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h index d875e4b93603b..3a73ff6b1b3b0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h @@ -42,8 +42,7 @@ class WebAssemblyRegisterInfo final : public WebAssemblyGenRegisterInfo { Register getFrameRegister(const MachineFunction &MF) const override; const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; + getPointerRegClass(unsigned Kind = 0) const override; // This does not apply to wasm. const uint32_t *getNoPreservedMask() const override { return nullptr; } }; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 7963dc1b755c9..c47bb3e67e625 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -61,6 +61,7 @@ X86RegisterInfo::X86RegisterInfo(const Triple &TT) // Cache some information. Is64Bit = TT.isArch64Bit(); + IsTarget64BitLP64 = Is64Bit && !TT.isX32(); IsWin64 = Is64Bit && TT.isOSWindows(); IsUEFI64 = Is64Bit && TT.isUEFI(); @@ -192,13 +193,11 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, } const TargetRegisterClass * -X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { - const X86Subtarget &Subtarget = MF.getSubtarget(); +X86RegisterInfo::getPointerRegClass(unsigned Kind) const { switch (Kind) { default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); case 0: // Normal GPRs. - if (Subtarget.isTarget64BitLP64()) + if (IsTarget64BitLP64) return &X86::GR64RegClass; // If the target is 64bit but we have been told to use 32bit addresses, // we can still use 64-bit register as long as we know the high bits @@ -206,16 +205,16 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, // Reflect that in the returned register class. return Is64Bit ? &X86::LOW32_ADDR_ACCESSRegClass : &X86::GR32RegClass; case 1: // Normal GPRs except the stack pointer (for encoding reasons). - if (Subtarget.isTarget64BitLP64()) + if (IsTarget64BitLP64) return &X86::GR64_NOSPRegClass; // NOSP does not contain RIP, so no special case here. return &X86::GR32_NOSPRegClass; case 2: // NOREX GPRs. - if (Subtarget.isTarget64BitLP64()) + if (IsTarget64BitLP64) return &X86::GR64_NOREXRegClass; return &X86::GR32_NOREXRegClass; case 3: // NOREX GPRs except the stack pointer (for encoding reasons). - if (Subtarget.isTarget64BitLP64()) + if (IsTarget64BitLP64) return &X86::GR64_NOREX_NOSPRegClass; // NOSP does not contain RIP, so no special case here. return &X86::GR32_NOREX_NOSPRegClass; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index d022e5ab87945..e646591663aca 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -28,6 +28,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo { /// bool Is64Bit; + /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? + bool IsTarget64BitLP64; + /// IsWin64 - Is the target on of win64 flavours /// bool IsWin64; @@ -78,8 +81,7 @@ class X86RegisterInfo final : public X86GenRegisterInfo { /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; + getPointerRegClass(unsigned Kind = 0) const override; /// getCrossCopyRegClass - Returns a legal register class to copy a register /// in the specified class to or from. Returns NULL if it is possible to copy From ccaeebcd04de4aa908c7101375e5834087cec330 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 12 Sep 2025 10:23:57 +0100 Subject: [PATCH 100/734] [AArch64][SME] Improve codegen for aarch64.sme.cnts* when not in streaming mode (#154761) Builtins for reading the streaming vector length are canonicalised to use the aarch64.sme.cntsd intrinisic and a multiply, i.e. - cntsb -> cntsd * 8 - cntsh -> cntsd * 4 - cntsw -> cntsd * 2 This patch also removes the LLVM intrinsics for cnts[b,h,w], and adds patterns to improve codegen when cntsd is multiplied by a constant. --- clang/include/clang/Basic/arm_sme.td | 14 +--- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 26 +++++- .../AArch64/sme-intrinsics/acle_sme_cnt.c | 42 +++++----- llvm/include/llvm/IR/IntrinsicsAArch64.td | 9 +-- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 18 +++++ .../Target/AArch64/AArch64ISelLowering.cpp | 16 +--- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 5 ++ .../AArch64/AArch64TargetTransformInfo.cpp | 18 ++--- .../CodeGen/AArch64/sme-intrinsics-rdsvl.ll | 79 ++++++++++++++----- .../sme-streaming-interface-remarks.ll | 4 +- .../AArch64/sme-streaming-interface.ll | 7 +- .../sme-intrinsic-opts-counting-elems.ll | 45 ----------- .../Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td | 3 - .../include/mlir/Dialect/ArmSME/Utils/Utils.h | 3 + .../Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp | 34 +++----- mlir/lib/Dialect/ArmSME/IR/Utils.cpp | 15 ++++ .../ArmSMEToLLVM/arm-sme-to-llvm.mlir | 17 ++-- mlir/test/Target/LLVMIR/arm-sme-invalid.mlir | 2 +- mlir/test/Target/LLVMIR/arm-sme.mlir | 6 -- 19 files changed, 195 insertions(+), 168 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index a4eb92e76968c..5f6a6eaab80a3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -156,16 +156,10 @@ let SMETargetGuard = "sme2p1" in { //////////////////////////////////////////////////////////////////////////////// // SME - Counting elements in a streaming vector -multiclass ZACount { - def NAME : SInst<"sv" # n_suffix, "nv", "", MergeNone, - "aarch64_sme_" # n_suffix, - [IsOverloadNone, IsStreamingCompatible]>; -} - -defm SVCNTSB : ZACount<"cntsb">; -defm SVCNTSH : ZACount<"cntsh">; -defm SVCNTSW : ZACount<"cntsw">; -defm SVCNTSD : ZACount<"cntsd">; +def SVCNTSB : SInst<"svcntsb", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>; +def SVCNTSH : SInst<"svcntsh", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>; +def SVCNTSW : SInst<"svcntsw", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>; +def SVCNTSD : SInst<"svcntsd", "nv", "", MergeNone, "aarch64_sme_cntsd", [IsOverloadNone, IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // SME - ADDHA/ADDVA diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index f4baf8c7f0dde..82b71e398dcc9 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -4304,9 +4304,11 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, // size in bytes. if (Ops.size() == 5) { Function *StreamingVectorLength = - CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd); llvm::Value *StreamingVectorLengthCall = - Builder.CreateCall(StreamingVectorLength); + Builder.CreateMul(Builder.CreateCall(StreamingVectorLength), + llvm::ConstantInt::get(Int64Ty, 8), "svl", + /* HasNUW */ true, /* HasNSW */ true); llvm::Value *Mulvl = Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl"); // The type of the ptr parameter is void *, so use Int8Ty here. @@ -4918,6 +4920,26 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, // Handle builtins which require their multi-vector operands to be swapped swapCommutativeSMEOperands(BuiltinID, Ops); + auto isCntsBuiltin = [&]() { + switch (BuiltinID) { + default: + return 0; + case SME::BI__builtin_sme_svcntsb: + return 8; + case SME::BI__builtin_sme_svcntsh: + return 4; + case SME::BI__builtin_sme_svcntsw: + return 2; + } + }; + + if (auto Mul = isCntsBuiltin()) { + llvm::Value *Cntd = + Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd)); + return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul), + "mulsvl", /* HasNUW */ true, /* HasNSW */ true); + } + // Should not happen! if (Builtin->LLVMIntrinsic == 0) return nullptr; diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_cnt.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_cnt.c index c0b3e1a06b0ff..049c1742e5a9d 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_cnt.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_cnt.c @@ -6,49 +6,55 @@ #include -// CHECK-C-LABEL: define dso_local i64 @test_svcntsb( +// CHECK-C-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test_svcntsb( // CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-C-NEXT: entry: -// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() -// CHECK-C-NEXT: ret i64 [[TMP0]] +// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd() +// CHECK-C-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +// CHECK-C-NEXT: ret i64 [[MULSVL]] // -// CHECK-CXX-LABEL: define dso_local noundef i64 @_Z12test_svcntsbv( +// CHECK-CXX-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @_Z12test_svcntsbv( // CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() -// CHECK-CXX-NEXT: ret i64 [[TMP0]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd() +// CHECK-CXX-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +// CHECK-CXX-NEXT: ret i64 [[MULSVL]] // uint64_t test_svcntsb() { return svcntsb(); } -// CHECK-C-LABEL: define dso_local i64 @test_svcntsh( +// CHECK-C-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test_svcntsh( // CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: -// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsh() -// CHECK-C-NEXT: ret i64 [[TMP0]] +// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd() +// CHECK-C-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +// CHECK-C-NEXT: ret i64 [[MULSVL]] // -// CHECK-CXX-LABEL: define dso_local noundef i64 @_Z12test_svcntshv( +// CHECK-CXX-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @_Z12test_svcntshv( // CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsh() -// CHECK-CXX-NEXT: ret i64 [[TMP0]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd() +// CHECK-CXX-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +// CHECK-CXX-NEXT: ret i64 [[MULSVL]] // uint64_t test_svcntsh() { return svcntsh(); } -// CHECK-C-LABEL: define dso_local i64 @test_svcntsw( +// CHECK-C-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test_svcntsw( // CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: -// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsw() -// CHECK-C-NEXT: ret i64 [[TMP0]] +// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd() +// CHECK-C-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 1 +// CHECK-C-NEXT: ret i64 [[MULSVL]] // -// CHECK-CXX-LABEL: define dso_local noundef i64 @_Z12test_svcntswv( +// CHECK-CXX-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @_Z12test_svcntswv( // CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsw() -// CHECK-CXX-NEXT: ret i64 [[TMP0]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd() +// CHECK-CXX-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 1 +// CHECK-CXX-NEXT: ret i64 [[MULSVL]] // uint64_t test_svcntsw() { return svcntsw(); diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 6d53bf8b172d8..7c9aef52b3acf 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3147,13 +3147,8 @@ let TargetPrefix = "aarch64" in { // Counting elements // - class AdvSIMD_SME_CNTSB_Intrinsic - : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>; - - def int_aarch64_sme_cntsb : AdvSIMD_SME_CNTSB_Intrinsic; - def int_aarch64_sme_cntsh : AdvSIMD_SME_CNTSB_Intrinsic; - def int_aarch64_sme_cntsw : AdvSIMD_SME_CNTSB_Intrinsic; - def int_aarch64_sme_cntsd : AdvSIMD_SME_CNTSB_Intrinsic; + def int_aarch64_sme_cntsd + : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>; // // PSTATE Functions diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 10671f09551a4..235dbc41c4bef 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -71,6 +71,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { template bool SelectRDVLImm(SDValue N, SDValue &Imm); + template + bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm); + bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); @@ -938,6 +941,21 @@ bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { return false; } +// Returns a suitable RDSVL multiplier from a left shift. +template +bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) { + if (!isa(N)) + return false; + + int64_t MulImm = 1 << cast(N)->getSExtValue(); + if (MulImm >= Low && MulImm <= High) { + Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32); + return true; + } + + return false; +} + /// SelectArithExtendedRegister - Select a "extended register" operand. This /// operand folds in an extend followed by an optional left shift. bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f970f71ecb89c..c9a756da0078d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6392,25 +6392,11 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_clz: return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, DL, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); - case Intrinsic::aarch64_sme_cntsb: - return DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(), - DAG.getConstant(1, DL, MVT::i32)); - case Intrinsic::aarch64_sme_cntsh: { - SDValue One = DAG.getConstant(1, DL, MVT::i32); - SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(), One); - return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes, One); - } - case Intrinsic::aarch64_sme_cntsw: { - SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(), - DAG.getConstant(1, DL, MVT::i32)); - return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes, - DAG.getConstant(2, DL, MVT::i32)); - } case Intrinsic::aarch64_sme_cntsd: { SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(), DAG.getConstant(1, DL, MVT::i32)); return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes, - DAG.getConstant(3, DL, MVT::i32)); + DAG.getConstant(3, DL, MVT::i32), SDNodeFlags::Exact); } case Intrinsic::aarch64_sve_cnt: { SDValue Data = Op.getOperand(3); diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 601dc34d74b9c..2d2d81a29526b 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -134,11 +134,16 @@ def : Pat<(AArch64_sme_state_alloc), (SMEStateAllocPseudo)>; def SDT_AArch64RDSVL : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>; def AArch64rdsvl : SDNode<"AArch64ISD::RDSVL", SDT_AArch64RDSVL>; +def sme_rdsvl_shl_imm : ComplexPattern">; + let Predicates = [HasSMEandIsNonStreamingSafe] in { def RDSVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>; def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>; def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>; +def : Pat<(i64 (shl (AArch64rdsvl (i32 1)), (sme_rdsvl_shl_imm i64:$imm))), + (RDSVLI_XI (!cast("trunc_imm") $imm))>; + def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 92321a76dbd80..b2d9e1e63f207 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2103,15 +2103,15 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) { } static std::optional -instCombineSMECntsElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts, - const AArch64Subtarget *ST) { +instCombineSMECntsd(InstCombiner &IC, IntrinsicInst &II, + const AArch64Subtarget *ST) { if (!ST->isStreaming()) return std::nullopt; - // In streaming-mode, aarch64_sme_cnts is equivalent to aarch64_sve_cnt + // In streaming-mode, aarch64_sme_cntds is equivalent to aarch64_sve_cntd // with SVEPredPattern::all - Value *Cnt = IC.Builder.CreateElementCount( - II.getType(), ElementCount::getScalable(NumElts)); + Value *Cnt = + IC.Builder.CreateElementCount(II.getType(), ElementCount::getScalable(2)); Cnt->takeName(&II); return IC.replaceInstUsesWith(II, Cnt); } @@ -2826,13 +2826,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_cntb: return instCombineSVECntElts(IC, II, 16); case Intrinsic::aarch64_sme_cntsd: - return instCombineSMECntsElts(IC, II, 2, ST); - case Intrinsic::aarch64_sme_cntsw: - return instCombineSMECntsElts(IC, II, 4, ST); - case Intrinsic::aarch64_sme_cntsh: - return instCombineSMECntsElts(IC, II, 8, ST); - case Intrinsic::aarch64_sme_cntsb: - return instCombineSMECntsElts(IC, II, 16, ST); + return instCombineSMECntsd(IC, II, ST); case Intrinsic::aarch64_sve_ptest_any: case Intrinsic::aarch64_sve_ptest_first: case Intrinsic::aarch64_sve_ptest_last: diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll index 5d10d7e13da14..06c53d8070781 100644 --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll @@ -1,46 +1,89 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s -define i64 @sme_cntsb() { -; CHECK-LABEL: sme_cntsb: +define i64 @cntsb() { +; CHECK-LABEL: cntsb: ; CHECK: // %bb.0: ; CHECK-NEXT: rdsvl x0, #1 ; CHECK-NEXT: ret - %v = call i64 @llvm.aarch64.sme.cntsb() - ret i64 %v + %1 = call i64 @llvm.aarch64.sme.cntsd() + %res = shl nuw nsw i64 %1, 3 + ret i64 %res } -define i64 @sme_cntsh() { -; CHECK-LABEL: sme_cntsh: +define i64 @cntsh() { +; CHECK-LABEL: cntsh: ; CHECK: // %bb.0: ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: lsr x0, x8, #1 ; CHECK-NEXT: ret - %v = call i64 @llvm.aarch64.sme.cntsh() - ret i64 %v + %1 = call i64 @llvm.aarch64.sme.cntsd() + %res = shl nuw nsw i64 %1, 2 + ret i64 %res } -define i64 @sme_cntsw() { -; CHECK-LABEL: sme_cntsw: +define i64 @cntsw() { +; CHECK-LABEL: cntsw: ; CHECK: // %bb.0: ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: lsr x0, x8, #2 ; CHECK-NEXT: ret - %v = call i64 @llvm.aarch64.sme.cntsw() - ret i64 %v + %1 = call i64 @llvm.aarch64.sme.cntsd() + %res = shl nuw nsw i64 %1, 1 + ret i64 %res } -define i64 @sme_cntsd() { -; CHECK-LABEL: sme_cntsd: +define i64 @cntsd() { +; CHECK-LABEL: cntsd: ; CHECK: // %bb.0: ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: lsr x0, x8, #3 +; CHECK-NEXT: ret + %res = call i64 @llvm.aarch64.sme.cntsd() + ret i64 %res +} + +define i64 @sme_cntsb_mul() { +; CHECK-LABEL: sme_cntsb_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x0, #4 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %shl = shl nuw nsw i64 %v, 3 + %res = mul nuw nsw i64 %shl, 4 + ret i64 %res +} + +define i64 @sme_cntsh_mul() { +; CHECK-LABEL: sme_cntsh_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x0, #4 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %shl = shl nuw nsw i64 %v, 2 + %res = mul nuw nsw i64 %shl, 8 + ret i64 %res +} + +define i64 @sme_cntsw_mul() { +; CHECK-LABEL: sme_cntsw_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x0, #4 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %shl = shl nuw nsw i64 %v, 1 + %res = mul nuw nsw i64 %shl, 16 + ret i64 %res +} + +define i64 @sme_cntsd_mul() { +; CHECK-LABEL: sme_cntsd_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x0, #4 ; CHECK-NEXT: ret %v = call i64 @llvm.aarch64.sme.cntsd() - ret i64 %v + %res = mul nuw nsw i64 %v, 32 + ret i64 %res } -declare i64 @llvm.aarch64.sme.cntsb() -declare i64 @llvm.aarch64.sme.cntsh() -declare i64 @llvm.aarch64.sme.cntsw() declare i64 @llvm.aarch64.sme.cntsd() diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll index e1a474d898233..2806f864c7b25 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll @@ -76,14 +76,14 @@ entry: %Data1 = alloca , align 16 %Data2 = alloca , align 16 %Data3 = alloca , align 16 - %0 = tail call i64 @llvm.aarch64.sme.cntsb() + %0 = tail call i64 @llvm.aarch64.sme.cntsd() call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0) %1 = load , ptr %Data1, align 16 %vecext = extractelement %1, i64 0 ret i8 %vecext } -declare i64 @llvm.aarch64.sme.cntsb() +declare i64 @llvm.aarch64.sme.cntsd() declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef) diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll index 8c4d57e244e03..505a40c16653b 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll @@ -366,9 +366,10 @@ define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: rdsvl x3, #1 +; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: addvl x0, sp, #2 ; CHECK-NEXT: addvl x1, sp, #1 +; CHECK-NEXT: lsr x3, x8, #3 ; CHECK-NEXT: mov x2, sp ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl foo @@ -386,7 +387,7 @@ entry: %Data1 = alloca , align 16 %Data2 = alloca , align 16 %Data3 = alloca , align 16 - %0 = tail call i64 @llvm.aarch64.sme.cntsb() + %0 = tail call i64 @llvm.aarch64.sme.cntsd() call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0) %1 = load , ptr %Data1, align 16 %vecext = extractelement %1, i64 0 @@ -421,7 +422,7 @@ entry: ret void } -declare i64 @llvm.aarch64.sme.cntsb() +declare i64 @llvm.aarch64.sme.cntsd() declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef) declare void @bar(ptr noundef, i64 noundef, i64 noundef, i32 noundef, i32 noundef, float noundef, float noundef, double noundef, double noundef) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sme-intrinsic-opts-counting-elems.ll b/llvm/test/Transforms/InstCombine/AArch64/sme-intrinsic-opts-counting-elems.ll index f213c0b53f6ef..c1d12b825b72c 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sme-intrinsic-opts-counting-elems.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sme-intrinsic-opts-counting-elems.ll @@ -5,48 +5,6 @@ target triple = "aarch64-unknown-linux-gnu" -define i64 @cntsb() { -; CHECK-LABEL: @cntsb( -; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sme.cntsb() -; CHECK-NEXT: ret i64 [[OUT]] -; -; CHECK-STREAMING-LABEL: @cntsb( -; CHECK-STREAMING-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-STREAMING-NEXT: [[OUT:%.*]] = shl nuw i64 [[TMP1]], 4 -; CHECK-STREAMING-NEXT: ret i64 [[OUT]] -; - %out = call i64 @llvm.aarch64.sme.cntsb() - ret i64 %out -} - -define i64 @cntsh() { -; CHECK-LABEL: @cntsh( -; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sme.cntsh() -; CHECK-NEXT: ret i64 [[OUT]] -; -; CHECK-STREAMING-LABEL: @cntsh( -; CHECK-STREAMING-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-STREAMING-NEXT: [[OUT:%.*]] = shl nuw i64 [[TMP1]], 3 -; CHECK-STREAMING-NEXT: ret i64 [[OUT]] -; - %out = call i64 @llvm.aarch64.sme.cntsh() - ret i64 %out -} - -define i64 @cntsw() { -; CHECK-LABEL: @cntsw( -; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sme.cntsw() -; CHECK-NEXT: ret i64 [[OUT]] -; -; CHECK-STREAMING-LABEL: @cntsw( -; CHECK-STREAMING-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-STREAMING-NEXT: [[OUT:%.*]] = shl nuw i64 [[TMP1]], 2 -; CHECK-STREAMING-NEXT: ret i64 [[OUT]] -; - %out = call i64 @llvm.aarch64.sme.cntsw() - ret i64 %out -} - define i64 @cntsd() { ; CHECK-LABEL: @cntsd( ; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sme.cntsd() @@ -61,8 +19,5 @@ define i64 @cntsd() { ret i64 %out } -declare i64 @llvm.aarch64.sve.cntsb() -declare i64 @llvm.aarch64.sve.cntsh() -declare i64 @llvm.aarch64.sve.cntsw() declare i64 @llvm.aarch64.sve.cntsd() diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td index 06fb8511774e8..4d19fa5415ef0 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td @@ -201,9 +201,6 @@ class ArmSME_IntrCountOp /*traits*/[PredOpTrait<"`res` is i64", TypeIsPred<"res", I64>>], /*numResults=*/1, /*overloadedResults=*/[]>; -def LLVM_aarch64_sme_cntsb : ArmSME_IntrCountOp<"cntsb">; -def LLVM_aarch64_sme_cntsh : ArmSME_IntrCountOp<"cntsh">; -def LLVM_aarch64_sme_cntsw : ArmSME_IntrCountOp<"cntsw">; def LLVM_aarch64_sme_cntsd : ArmSME_IntrCountOp<"cntsd">; #endif // ARMSME_INTRINSIC_OPS diff --git a/mlir/include/mlir/Dialect/ArmSME/Utils/Utils.h b/mlir/include/mlir/Dialect/ArmSME/Utils/Utils.h index 1f40eb6fc693c..b57b27de4e1de 100644 --- a/mlir/include/mlir/Dialect/ArmSME/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/ArmSME/Utils/Utils.h @@ -32,6 +32,9 @@ namespace mlir::arm_sme { constexpr unsigned MinStreamingVectorLengthInBits = 128; +/// Return the size represented by arm_sme::TypeSize in bytes. +unsigned getSizeInBytes(TypeSize type); + /// Return minimum number of elements for the given element `type` in /// a vector of SVL bits. unsigned getSMETileSliceMinNumElts(Type type); diff --git a/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp b/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp index 8a2e3b639aaa7..033e9ae1f4d4c 100644 --- a/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp +++ b/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp @@ -822,7 +822,7 @@ struct OuterProductWideningOpConversion } }; -/// Lower `arm_sme.streaming_vl` to SME CNTS intrinsics. +/// Lower `arm_sme.streaming_vl` to SME CNTSD intrinsic. /// /// Example: /// @@ -830,8 +830,10 @@ struct OuterProductWideningOpConversion /// /// is converted to: /// -/// %cnt = "arm_sme.intr.cntsh"() : () -> i64 -/// %0 = arith.index_cast %cnt : i64 to index +/// %cnt = "arm_sme.intr.cntsd"() : () -> i64 +/// %scale = arith.constant 4 : index +/// %cntIndex = arith.index_cast %cnt : i64 to index +/// %0 = arith.muli %cntIndex, %scale : index /// struct StreamingVLOpConversion : public ConvertArmSMEOpToLLVMPattern Operation * { - switch (streamingVlOp.getTypeSize()) { - case arm_sme::TypeSize::Byte: - return arm_sme::aarch64_sme_cntsb::create(rewriter, loc, i64Type); - case arm_sme::TypeSize::Half: - return arm_sme::aarch64_sme_cntsh::create(rewriter, loc, i64Type); - case arm_sme::TypeSize::Word: - return arm_sme::aarch64_sme_cntsw::create(rewriter, loc, i64Type); - case arm_sme::TypeSize::Double: - return arm_sme::aarch64_sme_cntsd::create(rewriter, loc, i64Type); - } - llvm_unreachable("unknown type size in StreamingVLOpConversion"); - }(); - rewriter.replaceOpWithNewOp( - streamingVlOp, rewriter.getIndexType(), intrOp->getResult(0)); + auto cntsd = arm_sme::aarch64_sme_cntsd::create(rewriter, loc, i64Type); + auto cntsdIdx = arith::IndexCastOp::create(rewriter, loc, + rewriter.getIndexType(), cntsd); + auto scale = arith::ConstantIndexOp::create( + rewriter, loc, + 8 / arm_sme::getSizeInBytes(streamingVlOp.getTypeSize())); + rewriter.replaceOpWithNewOp(streamingVlOp, cntsdIdx, scale); return success(); } }; @@ -964,9 +958,7 @@ void mlir::configureArmSMEToLLVMConversionLegality(ConversionTarget &target) { arm_sme::aarch64_sme_smops_za32, arm_sme::aarch64_sme_umopa_za32, arm_sme::aarch64_sme_umops_za32, arm_sme::aarch64_sme_sumopa_wide, arm_sme::aarch64_sme_sumops_wide, arm_sme::aarch64_sme_usmopa_wide, - arm_sme::aarch64_sme_usmops_wide, arm_sme::aarch64_sme_cntsb, - arm_sme::aarch64_sme_cntsh, arm_sme::aarch64_sme_cntsw, - arm_sme::aarch64_sme_cntsd>(); + arm_sme::aarch64_sme_usmops_wide, arm_sme::aarch64_sme_cntsd>(); target.addLegalDialect vec // ----- // CHECK-LABEL: @arm_sme_streaming_vl_bytes -// CHECK: %[[COUNT:.*]] = "arm_sme.intr.cntsb"() : () -> i64 -// CHECK: %[[INDEX_COUNT:.*]] = arith.index_cast %[[COUNT]] : i64 to index -// CHECK: return %[[INDEX_COUNT]] : index +// CHECK: %[[CONST:.*]] = arith.constant 8 : index +// CHECK: %[[CNTSD:.*]] = "arm_sme.intr.cntsd"() : () -> i64 +// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] : i64 to index +// CHECK: %[[MUL:.*]] = arith.muli %[[CNTSD_IDX]], %[[CONST]] : index func.func @arm_sme_streaming_vl_bytes() -> index { %svl_b = arm_sme.streaming_vl return %svl_b : index @@ -597,7 +598,10 @@ func.func @arm_sme_streaming_vl_bytes() -> index { // ----- // CHECK-LABEL: @arm_sme_streaming_vl_half_words -// CHECK: "arm_sme.intr.cntsh"() : () -> i64 +// CHECK: %[[CONST:.*]] = arith.constant 4 : index +// CHECK: %[[CNTSD:.*]] = "arm_sme.intr.cntsd"() : () -> i64 +// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] : i64 to index +// CHECK: %[[MUL:.*]] = arith.muli %[[CNTSD_IDX]], %[[CONST]] : index func.func @arm_sme_streaming_vl_half_words() -> index { %svl_h = arm_sme.streaming_vl return %svl_h : index @@ -606,7 +610,10 @@ func.func @arm_sme_streaming_vl_half_words() -> index { // ----- // CHECK-LABEL: @arm_sme_streaming_vl_words -// CHECK: "arm_sme.intr.cntsw"() : () -> i64 +// CHECK: %[[CONST:.*]] = arith.constant 2 : index +// CHECK: %[[CNTSD:.*]] = "arm_sme.intr.cntsd"() : () -> i64 +// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] : i64 to index +// CHECK: %[[MUL:.*]] = arith.muli %[[CNTSD_IDX]], %[[CONST]] : index func.func @arm_sme_streaming_vl_words() -> index { %svl_w = arm_sme.streaming_vl return %svl_w : index diff --git a/mlir/test/Target/LLVMIR/arm-sme-invalid.mlir b/mlir/test/Target/LLVMIR/arm-sme-invalid.mlir index 14821da838726..6f5b1d8c5d93d 100644 --- a/mlir/test/Target/LLVMIR/arm-sme-invalid.mlir +++ b/mlir/test/Target/LLVMIR/arm-sme-invalid.mlir @@ -36,6 +36,6 @@ llvm.func @arm_sme_tile_slice_to_vector_invalid_element_types( llvm.func @arm_sme_streaming_vl_invalid_return_type() -> i32 { // expected-error @+1 {{failed to verify that `res` is i64}} - %res = "arm_sme.intr.cntsb"() : () -> i32 + %res = "arm_sme.intr.cntsd"() : () -> i32 llvm.return %res : i32 } diff --git a/mlir/test/Target/LLVMIR/arm-sme.mlir b/mlir/test/Target/LLVMIR/arm-sme.mlir index aedb6730b06bb..0a13a75618a23 100644 --- a/mlir/test/Target/LLVMIR/arm-sme.mlir +++ b/mlir/test/Target/LLVMIR/arm-sme.mlir @@ -419,12 +419,6 @@ llvm.func @arm_sme_tile_slice_to_vector_vert(%tileslice : i32, // ----- llvm.func @arm_sme_streaming_vl() { - // CHECK: call i64 @llvm.aarch64.sme.cntsb() - %svl_b = "arm_sme.intr.cntsb"() : () -> i64 - // CHECK: call i64 @llvm.aarch64.sme.cntsh() - %svl_h = "arm_sme.intr.cntsh"() : () -> i64 - // CHECK: call i64 @llvm.aarch64.sme.cntsw() - %svl_w = "arm_sme.intr.cntsw"() : () -> i64 // CHECK: call i64 @llvm.aarch64.sme.cntsd() %svl_d = "arm_sme.intr.cntsd"() : () -> i64 llvm.return From 4b03252ad6a31db0cfacf75330f41eafe2f027a6 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Fri, 12 Sep 2025 15:00:12 +0530 Subject: [PATCH 101/734] [NFC][AMDGPU][SIMemoryLegalizer] remove effectively empty function (#156806) The removed function SIGfx90ACacheControl::enableLoadCacheBypass() does not actually do anything except one assert and one unreachable. --- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 39 -------------------- 1 file changed, 39 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 1637c06936f9b..c501ebba0c7ed 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -462,10 +462,6 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl { SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const override; - bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI, - SIAtomicScope Scope, - SIAtomicAddrSpace AddrSpace) const override; - bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const override; @@ -1375,41 +1371,6 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass( return Changed; } -bool SIGfx90ACacheControl::enableStoreCacheBypass( - const MachineBasicBlock::iterator &MI, - SIAtomicScope Scope, - SIAtomicAddrSpace AddrSpace) const { - assert(!MI->mayLoad() && MI->mayStore()); - bool Changed = false; - - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { - switch (Scope) { - case SIAtomicScope::SYSTEM: - case SIAtomicScope::AGENT: - /// Do not set glc for store atomic operations as they implicitly write - /// through the L1 cache. - break; - case SIAtomicScope::WORKGROUP: - case SIAtomicScope::WAVEFRONT: - case SIAtomicScope::SINGLETHREAD: - // No cache to bypass. Store atomics implicitly write through the L1 - // cache. - break; - default: - llvm_unreachable("Unsupported synchronization scope"); - } - } - - /// The scratch address space does not need the global memory caches - /// to be bypassed as all memory operations by the same thread are - /// sequentially consistent, and no other thread can access scratch - /// memory. - - /// Other address spaces do not have a cache. - - return Changed; -} - bool SIGfx90ACacheControl::enableRMWCacheBypass( const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, From 3b48c64d0822dfaee98ab85a9299d03f50490f8e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 18:35:57 +0900 Subject: [PATCH 102/734] AMDGPU: Move spill pseudo special case out of adjustAllocatableRegClass (#158246) This is special for the same reason av_mov_b64_imm_pseudo is special. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 8 +++----- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 6 ++++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 5c3340703ba3b..81fc0b4888a73 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5976,8 +5976,7 @@ SIInstrInfo::getWholeWaveFunctionSetup(MachineFunction &MF) const { static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MCInstrDesc &TID, unsigned RCID) { - if (!ST.hasGFX90AInsts() && (((TID.mayLoad() || TID.mayStore()) && - !(TID.TSFlags & SIInstrFlags::Spill)))) { + if (!ST.hasGFX90AInsts() && (TID.mayLoad() || TID.mayStore())) { switch (RCID) { case AMDGPU::AV_32RegClassID: RCID = AMDGPU::VGPR_32RegClassID; @@ -6012,10 +6011,9 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, if (OpNum >= TID.getNumOperands()) return nullptr; auto RegClass = TID.operands()[OpNum].RegClass; - if (TID.getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO) { - // Special pseudos have no alignment requirement + // Special pseudos have no alignment requirement. + if (TID.getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO || isSpill(TID)) return RI.getRegClass(RegClass); - } return adjustAllocatableRegClass(ST, RI, TID, RegClass); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index f7dde2b90b68e..e0373e7768435 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -797,10 +797,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return get(Opcode).TSFlags & SIInstrFlags::Spill; } - static bool isSpill(const MachineInstr &MI) { - return MI.getDesc().TSFlags & SIInstrFlags::Spill; + static bool isSpill(const MCInstrDesc &Desc) { + return Desc.TSFlags & SIInstrFlags::Spill; } + static bool isSpill(const MachineInstr &MI) { return isSpill(MI.getDesc()); } + static bool isWWMRegSpillOpcode(uint16_t Opcode) { return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || From 14ae5f32f70f965df822de18ca93521ffb820079 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Fri, 12 Sep 2025 14:38:59 +0500 Subject: [PATCH 103/734] [lldb] Allow division by floating point zero in Scalar (#158115) `Scalar` produced an invalid value when detecting any division by zero. This should be only for integer division. --- lldb/source/Utility/Scalar.cpp | 5 +++-- lldb/unittests/Utility/ScalarTest.cpp | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lldb/source/Utility/Scalar.cpp b/lldb/source/Utility/Scalar.cpp index 7fbe46d46194f..c8766bdf2aee7 100644 --- a/lldb/source/Utility/Scalar.cpp +++ b/lldb/source/Utility/Scalar.cpp @@ -565,12 +565,13 @@ const Scalar lldb_private::operator-(Scalar lhs, Scalar rhs) { const Scalar lldb_private::operator/(Scalar lhs, Scalar rhs) { Scalar result; - if ((result.m_type = Scalar::PromoteToMaxType(lhs, rhs)) != Scalar::e_void && - !rhs.IsZero()) { + if ((result.m_type = Scalar::PromoteToMaxType(lhs, rhs)) != Scalar::e_void) { switch (result.m_type) { case Scalar::e_void: break; case Scalar::e_int: + if (rhs.IsZero()) + break; result.m_integer = lhs.m_integer / rhs.m_integer; return result; case Scalar::e_float: diff --git a/lldb/unittests/Utility/ScalarTest.cpp b/lldb/unittests/Utility/ScalarTest.cpp index 256d456783583..6d5caef42bee4 100644 --- a/lldb/unittests/Utility/ScalarTest.cpp +++ b/lldb/unittests/Utility/ScalarTest.cpp @@ -337,6 +337,12 @@ TEST(ScalarTest, Division) { Scalar r = lhs / rhs; EXPECT_TRUE(r.IsValid()); EXPECT_EQ(r, Scalar(2.5)); + + Scalar inf = Scalar(1) / Scalar(0.0f); + Scalar int0 = Scalar(1) / Scalar(0); + Scalar ref_inf = llvm::APFloat::getInf(llvm::APFloat::IEEEsingle()); + EXPECT_EQ(inf, ref_inf); + EXPECT_FALSE(int0.IsValid()); } TEST(ScalarTest, Promotion) { From 1f49c9494e9a12396a94f36e7e7507304bc83c0e Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Fri, 12 Sep 2025 10:39:29 +0100 Subject: [PATCH 104/734] [InstSimplify] Simplify get.active.lane.mask when 2nd arg is zero (#158018) When the second argument passed to the get.active.lane.mask intrinsic is zero we can simplify the instruction to return an all-false mask regardless of the first operand. --- llvm/lib/Analysis/InstructionSimplify.cpp | 4 ++++ .../ConstProp/active-lane-mask.ll | 3 +-- .../InstSimplify/get_active_lane_mask.ll | 20 +++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index ebe329aa1d5fe..7bff13d59528c 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6474,6 +6474,10 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, const CallBase *Call) { unsigned BitWidth = ReturnType->getScalarSizeInBits(); switch (IID) { + case Intrinsic::get_active_lane_mask: + if (match(Op1, m_Zero())) + return ConstantInt::getFalse(ReturnType); + break; case Intrinsic::abs: // abs(abs(x)) -> abs(x). We don't need to worry about the nsw arg here. // It is always ok to pick the earlier abs. We'll just lose nsw if its only diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll index 9de0c597305b0..e9d9ac040ea1d 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll @@ -354,8 +354,7 @@ entry: define @nxv16i1_constexpr_0() { ; CHECK-LABEL: @nxv16i1_constexpr_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 ptrtoint (ptr @glob to i64), i64 0) -; CHECK-NEXT: ret [[MASK]] +; CHECK-NEXT: ret zeroinitializer ; entry: %mask = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 ptrtoint (ptr @glob to i64), i64 0) diff --git a/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll b/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll new file mode 100644 index 0000000000000..a3b8e4efbe939 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s + +define <4 x i1> @foo_v4i1(i32 %a) { +; CHECK-LABEL: define <4 x i1> @foo_v4i1( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: ret <4 x i1> zeroinitializer +; + %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1(i32 %a, i32 0) + ret <4 x i1> %mask +} + +define @foo_nxv8i1(i32 %a) { +; CHECK-LABEL: define @foo_nxv8i1( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %mask = call @llvm.get.active.lane.mask.nxv8i1(i32 %a, i32 0) + ret %mask +} From 0d65856584dffafbc417919d8fcb8ad66f9fbf8a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 19:08:57 +0900 Subject: [PATCH 105/734] libclc: Remove HAVE_LLVM version macros (#158257) This doesn't need to pretend to support multiple versions of llvm and these are old anyway. --- libclc/utils/CMakeLists.txt | 4 ---- libclc/utils/prepare-builtins.cpp | 17 ----------------- 2 files changed, 21 deletions(-) diff --git a/libclc/utils/CMakeLists.txt b/libclc/utils/CMakeLists.txt index 6851ae16bda07..a14d133985a64 100644 --- a/libclc/utils/CMakeLists.txt +++ b/libclc/utils/CMakeLists.txt @@ -1,6 +1,3 @@ -# Construct LLVM version define -set( LLVM_VERSION_DEFINE "-DHAVE_LLVM=0x${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}" ) - # Setup prepare_builtins tools set( LLVM_LINK_COMPONENTS BitReader @@ -19,6 +16,5 @@ else() setup_host_tool( prepare_builtins PREPARE_BUILTINS prepare_builtins_exe prepare_builtins_target ) endif() -target_compile_definitions( prepare_builtins PRIVATE ${LLVM_VERSION_DEFINE} ) # These were not properly reported in early LLVM and we don't need them target_compile_options( prepare_builtins PRIVATE -fno-rtti -fno-exceptions ) diff --git a/libclc/utils/prepare-builtins.cpp b/libclc/utils/prepare-builtins.cpp index b10dfccc6d88c..40a5445ef507f 100644 --- a/libclc/utils/prepare-builtins.cpp +++ b/libclc/utils/prepare-builtins.cpp @@ -6,12 +6,8 @@ // //===----------------------------------------------------------------------===// -#if HAVE_LLVM > 0x0390 #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" -#else -#include "llvm/Bitcode/ReaderWriter.h" -#endif #include "llvm/Config/llvm-config.h" #include "llvm/IR/Function.h" @@ -62,12 +58,8 @@ int main(int argc, char **argv) { std::unique_ptr &BufferPtr = BufferOrErr.get(); SMDiagnostic Err; std::unique_ptr MPtr = -#if HAVE_LLVM > 0x0390 ExitOnErr(Expected>( parseIR(BufferPtr.get()->getMemBufferRef(), Err, Context))); -#else - parseIR(BufferPtr.get()->getMemBufferRef(), Err, Context); -#endif M = MPtr.release(); } } @@ -106,13 +98,8 @@ int main(int argc, char **argv) { } std::error_code EC; -#if HAVE_LLVM >= 0x0600 std::unique_ptr Out( new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None)); -#else - std::unique_ptr Out( - new tool_output_file(OutputFilename, EC, sys::fs::OF_None)); -#endif if (EC) { errs() << EC.message() << '\n'; exit(1); @@ -121,11 +108,7 @@ int main(int argc, char **argv) { if (TextualOut) M->print(Out->os(), nullptr, true); else -#if HAVE_LLVM >= 0x0700 WriteBitcodeToFile(*M, Out->os()); -#else - WriteBitcodeToFile(M, Out->os()); -#endif // Declare success. Out->keep(); From 77596b78e5664fff8d272599c0420fc9b87e2c2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Fri, 12 Sep 2025 11:09:06 +0100 Subject: [PATCH 106/734] [mlir][vector] Add a new TD op to wrap unit-dim collapsing patterns (#157507) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new TD Op, * `apply_patterns.vector.drop_inner_most_unit_dims_from_xfer_ops`, which wraps the following Vector patterns: * `DropInnerMostUnitDimsTransferRead` * `DropInnerMostUnitDimsTransferWrite` This complements other existing unit-dimension–related patterns. To reduce duplication, the `TestVectorTransferCollapseInnerMostContiguousDims` pass has been removed. That pass was only used for testing, and its functionality is now covered by the newly added TD Op. --- .../Vector/TransformOps/VectorTransformOps.td | 14 ++++++++ .../TransformOps/VectorTransformOps.cpp | 5 +++ .../Vector/td/xfer-drop-unit-dims.mlir | 11 +++++++ ...tor-transfer-collapse-inner-most-dims.mlir | 4 ++- .../Dialect/Vector/TestVectorTransforms.cpp | 32 ------------------- 5 files changed, 33 insertions(+), 33 deletions(-) create mode 100644 mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir diff --git a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td index 72a69a056c46e..03d25505dc65c 100644 --- a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td +++ b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td @@ -85,6 +85,20 @@ def ApplyDropUnitDimWithShapeCastPatternsOp : Op]> { + let description = [{ + Apply vector patterns to drop the inner most unit dims from + vector.transfer_read and vector.transfer_write Ops by taking a subview (via + memref.subview) of the original source/destination MemRef. Since it + requires the input/ouptu to be MemRefs, this Op is only helpful + past-bufferization. + }]; + + let assemblyFormat = "attr-dict"; +} + def ApplyTransferPermutationPatternsOp : Op]> { diff --git a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp index 6bb390aa09d3e..18f105ef62e38 100644 --- a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp +++ b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp @@ -88,6 +88,11 @@ void transform::ApplyDropUnitDimWithShapeCastPatternsOp::populatePatterns( vector::populateDropUnitDimWithShapeCastPatterns(patterns); } +void transform::ApplyDropInnerMostUnitDimsFromXferOpsPatternsOp:: + populatePatterns(RewritePatternSet &patterns) { + vector::populateDropInnerMostUnitDimsXferOpPatterns(patterns); +} + void transform::ApplyLowerBitCastPatternsOp::populatePatterns( RewritePatternSet &patterns) { vector::populateVectorBitCastLoweringPatterns(patterns); diff --git a/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir b/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir new file mode 100644 index 0000000000000..5bffa20842b0c --- /dev/null +++ b/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir @@ -0,0 +1,11 @@ +module @transforms attributes { transform.with_named_sequence } { + transform.named_sequence @drop_unit_dims(%module: !transform.any_op {transform.readonly}) { + + %func_op = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.op<"func.func"> + transform.apply_patterns to %func_op { + transform.apply_patterns.vector.drop_inner_most_unit_dims_from_xfer_ops + } : !transform.op<"func.func"> + + transform.yield + } +} diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir index cd56c1bf9695b..18c28799a62e5 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir @@ -1,4 +1,6 @@ -// RUN: mlir-opt %s -test-vector-transfer-collapse-inner-most-dims -split-input-file | FileCheck %s +// RUN: mlir-opt -split-input-file \ +// RUN: -transform-preload-library='transform-library-paths=%p/td/xfer-drop-unit-dims.mlir' \ +// RUN: -transform-interpreter=entry-point=drop_unit_dims %s | FileCheck %s //----------------------------------------------------------------------------- // 1. vector.transfer_read diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index d6596cd341df7..c2d184626818f 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -344,36 +344,6 @@ struct TestVectorTransferOpt } }; -struct TestVectorTransferCollapseInnerMostContiguousDims - : public PassWrapper> { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( - TestVectorTransferCollapseInnerMostContiguousDims) - - TestVectorTransferCollapseInnerMostContiguousDims() = default; - TestVectorTransferCollapseInnerMostContiguousDims( - const TestVectorTransferCollapseInnerMostContiguousDims &pass) = default; - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - - StringRef getArgument() const final { - return "test-vector-transfer-collapse-inner-most-dims"; - } - - StringRef getDescription() const final { - return "Test lowering patterns that reduces the rank of the vector " - "transfer memory and vector operands."; - } - - void runOnOperation() override { - RewritePatternSet patterns(&getContext()); - populateDropInnerMostUnitDimsXferOpPatterns(patterns); - (void)applyPatternsGreedily(getOperation(), std::move(patterns)); - } -}; - struct TestVectorSinkPatterns : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorSinkPatterns) @@ -1079,8 +1049,6 @@ void registerTestVectorLowerings() { PassRegistration(); - PassRegistration(); - PassRegistration(); PassRegistration(); From 8457e68b6b59f8daf5fb747fe3a2f9c48c3c3ba8 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 12 Sep 2025 11:10:57 +0100 Subject: [PATCH 107/734] Introduce LDBG_OS() macro as a variant of LDBG() (#157194) (#158260) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also, improve LDBG() to accept debug type and level in any order, and add unit-tests for LDBG() and LGDB_OS(). LDBG_OS() is a macro that behaves like LDBG() but instead of directly using it to stream the output, it takes a callback function that will be called with a raw_ostream. Co-authored-by: Andrzej Warzyński Co-authored-by: Andrzej Warzyński --- llvm/include/llvm/Support/Debug.h | 5 - llvm/include/llvm/Support/DebugLog.h | 279 ++++++++++++++---- llvm/unittests/Support/DebugLogTest.cpp | 129 +++++++- .../lib/Dialect/Transform/IR/TransformOps.cpp | 12 +- 4 files changed, 342 insertions(+), 83 deletions(-) diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h index a7795d403721c..b73f2d7c8b852 100644 --- a/llvm/include/llvm/Support/Debug.h +++ b/llvm/include/llvm/Support/Debug.h @@ -44,11 +44,6 @@ class raw_ostream; /// level, return false. LLVM_ABI bool isCurrentDebugType(const char *Type, int Level = 0); -/// Overload allowing to swap the order of the Type and Level arguments. -LLVM_ABI inline bool isCurrentDebugType(int Level, const char *Type) { - return isCurrentDebugType(Type, Level); -} - /// setCurrentDebugType - Set the current debug type, as if the -debug-only=X /// option were specified. Note that DebugFlag also needs to be set to true for /// debug output to be produced. diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h index dce706e196bde..f7748bc9904b1 100644 --- a/llvm/include/llvm/Support/DebugLog.h +++ b/llvm/include/llvm/Support/DebugLog.h @@ -19,52 +19,55 @@ namespace llvm { #ifndef NDEBUG -// LDBG() is a macro that can be used as a raw_ostream for debugging. -// It will stream the output to the dbgs() stream, with a prefix of the -// debug type and the file and line number. A trailing newline is added to the -// output automatically. If the streamed content contains a newline, the prefix -// is added to each beginning of a new line. Nothing is printed if the debug -// output is not enabled or the debug type does not match. -// -// E.g., -// LDBG() << "Bitset contains: " << Bitset; -// is somehow equivalent to -// LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] " << __FILE__ << ":" << -// __LINE__ << " " -// << "Bitset contains: " << Bitset << "\n"); -// +/// LDBG() is a macro that can be used as a raw_ostream for debugging. +/// It will stream the output to the dbgs() stream, with a prefix of the +/// debug type and the file and line number. A trailing newline is added to the +/// output automatically. If the streamed content contains a newline, the prefix +/// is added to each beginning of a new line. Nothing is printed if the debug +/// output is not enabled or the debug type does not match. +/// +/// E.g., +/// LDBG() << "Bitset contains: " << Bitset; +/// is equivalent to +/// LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] " << __FILE__ << ":" << +/// __LINE__ << " " +/// << "Bitset contains: " << Bitset << "\n"); +/// // An optional `level` argument can be provided to control the verbosity of the -// output. The default level is 1, and is in increasing level of verbosity. -// -// The `level` argument can be a literal integer, or a macro that evaluates to -// an integer. -// -// An optional `type` argument can be provided to control the debug type. The -// default type is DEBUG_TYPE. The `type` argument can be a literal string, or a -// macro that evaluates to a string. +/// output. The default level is 1, and is in increasing level of verbosity. +/// +/// The `level` argument can be a literal integer, or a macro that evaluates to +/// an integer. +/// +/// An optional `type` argument can be provided to control the debug type. The +/// default type is DEBUG_TYPE. The `type` argument can be a literal string, or +/// a macro that evaluates to a string. +/// +/// E.g., +/// LDBG(2) << "Bitset contains: " << Bitset; +/// LDBG("debug_type") << "Bitset contains: " << Bitset; +/// LDBG("debug_type", 2) << "Bitset contains: " << Bitset; #define LDBG(...) _GET_LDBG_MACRO(__VA_ARGS__)(__VA_ARGS__) -// Helper macros to choose the correct macro based on the number of arguments. -#define LDBG_FUNC_CHOOSER(_f1, _f2, _f3, ...) _f3 -#define LDBG_FUNC_RECOMPOSER(argsWithParentheses) \ - LDBG_FUNC_CHOOSER argsWithParentheses -#define LDBG_CHOOSE_FROM_ARG_COUNT(...) \ - LDBG_FUNC_RECOMPOSER( \ - (__VA_ARGS__, LDBG_LOG_LEVEL_WITH_TYPE, LDBG_LOG_LEVEL, )) -#define LDBG_NO_ARG_EXPANDER() , , LDBG_LOG_LEVEL_1 -#define _GET_LDBG_MACRO(...) \ - LDBG_CHOOSE_FROM_ARG_COUNT(LDBG_NO_ARG_EXPANDER __VA_ARGS__()) - -// Dispatch macros to support the `level` argument or none (default to 1) -#define LDBG_LOG_LEVEL(LEVEL) \ - DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), LEVEL, DEBUG_TYPE) -#define LDBG_LOG_LEVEL_1() LDBG_LOG_LEVEL(1) -// This macro is a helper when LDBG() is called with 2 arguments. -// In this case we want to allow the order of the arguments to be swapped. -// We rely on the fact that the `level` argument is an integer, and the `type` -// is a string and dispatch to a C++ API that is overloaded. -#define LDBG_LOG_LEVEL_WITH_TYPE(LEVEL_OR_TYPE, TYPE_OR_LEVEL) \ - DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), (LEVEL_OR_TYPE), (TYPE_OR_LEVEL)) +/// LDBG_OS() is a macro that behaves like LDBG() but instead of directly using +/// it to stream the output, it takes a callback function that will be called +/// with a raw_ostream. +/// This is useful when you need to pass a `raw_ostream` to a helper function to +/// be able to print (when the `<<` operator is not available). +/// +/// E.g., +/// LDBG_OS([&] (raw_ostream &Os) { +/// Os << "Pass Manager contains: "; +/// pm.printAsTextual(Os); +/// }); +/// +/// Just like LDBG(), it optionally accepts a `level` and `type` arguments. +/// E.g., +/// LDBG_OS(2, [&] (raw_ostream &Os) { ... }); +/// LDBG_OS("debug_type", [&] (raw_ostream &Os) { ... }); +/// LDBG_OS("debug_type", 2, [&] (raw_ostream &Os) { ... }); +/// +#define LDBG_OS(...) _GET_LDBG_OS_MACRO(__VA_ARGS__)(__VA_ARGS__) // We want the filename without the full path. We are using the __FILE__ macro // and a constexpr function to strip the path prefix. We can avoid the frontend @@ -76,22 +79,167 @@ namespace llvm { #define __LLVM_FILE_NAME__ ::llvm::impl::getShortFileName(__FILE__) #endif -#define DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(STREAM, LEVEL, TYPE, FILE, \ - LINE) \ - for (bool _c = \ - (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE, LEVEL)); \ +// Everything below are implementation details of the macros above. +namespace impl { + +/// This macro expands to the stream to use for output, we use a macro to allow +/// unit-testing to override. +#define LDBG_STREAM ::llvm::dbgs() + +// ---------------------------------------------------------------------------- +// LDBG() implementation +// ---------------------------------------------------------------------------- + +// Helper macros to choose the correct LDBG() macro based on the number of +// arguments. +#define LDBG_FUNC_CHOOSER(_f1, _f2, _f3, ...) _f3 +#define LDBG_FUNC_RECOMPOSER(argsWithParentheses) \ + LDBG_FUNC_CHOOSER argsWithParentheses +#define LDBG_CHOOSE_FROM_ARG_COUNT(...) \ + LDBG_FUNC_RECOMPOSER((__VA_ARGS__, LDBG_TYPE_AND_LEVEL, LDBG_LEVEL_OR_TYPE, )) +#define LDBG_NO_ARG_EXPANDER() , , LDBG_NO_ARG +#define _GET_LDBG_MACRO(...) \ + LDBG_CHOOSE_FROM_ARG_COUNT(LDBG_NO_ARG_EXPANDER __VA_ARGS__()) + +/// This macro is the core of the LDBG() implementation. It is used to print the +/// debug output with the given stream, level, type, file, and line number. +#define LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(STREAM, LEVEL_OR_TYPE, \ + TYPE_OR_LEVEL, FILE, LINE) \ + for (bool _c = ::llvm::DebugFlag && ::llvm::impl::ldbgIsCurrentDebugType( \ + TYPE_OR_LEVEL, LEVEL_OR_TYPE); \ _c; _c = false) \ - for (::llvm::impl::raw_ldbg_ostream LdbgOS{ \ - ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), (STREAM)}; \ - _c; _c = false) \ - ::llvm::impl::RAIINewLineStream{LdbgOS}.asLvalue() + ::llvm::impl::raw_ldbg_ostream{ \ + ::llvm::impl::computePrefix(TYPE_OR_LEVEL, FILE, LINE, LEVEL_OR_TYPE), \ + (STREAM), /*ShouldPrefixNextString=*/true, \ + /*ShouldEmitNewLineOnDestruction=*/true} \ + .asLvalue() -#define DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, FILE) \ - DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(STREAM, LEVEL, TYPE, FILE, __LINE__) -#define DEBUGLOG_WITH_STREAM_AND_TYPE(STREAM, LEVEL, TYPE) \ - DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, __LLVM_FILE_NAME__) +/// These macros are helpers to implement LDBG() with an increasing amount of +/// optional arguments made explicit. +#define LDBG_STREAM_LEVEL_TYPE_AND_FILE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL, \ + FILE) \ + LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL, \ + FILE, __LINE__) +#define LDGB_STREAM_LEVEL_AND_TYPE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL) \ + LDBG_STREAM_LEVEL_TYPE_AND_FILE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL, \ + __LLVM_FILE_NAME__) +/// This macro is a helper when LDBG() is called with 2 arguments. +/// In this case we want to force the first argument to be the type for +/// consistency in the codebase. +/// We trick this by casting the first argument to a (const char *) which +/// won't compile with an int. +#define LDBG_TYPE_AND_LEVEL(TYPE, LEVEL) \ + LDGB_STREAM_LEVEL_AND_TYPE(LDBG_STREAM, static_cast(TYPE), \ + (LEVEL)) -namespace impl { +/// When a single argument is provided. This can be either a level or the debug +/// type. If a level is provided, we default the debug type to DEBUG_TYPE, if a +/// string is provided, we default the level to 1. +#define LDBG_LEVEL_OR_TYPE(LEVEL_OR_TYPE) \ + LDGB_STREAM_LEVEL_AND_TYPE(LDBG_STREAM, (LEVEL_OR_TYPE), \ + LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE)) +#define LDBG_NO_ARG() LDBG_LEVEL_OR_TYPE(1) + +// ---------------------------------------------------------------------------- +// LDBG_OS() implementation +// ---------------------------------------------------------------------------- + +// Helper macros to choose the correct LDBG_OS() macro based on the number of +// arguments. +#define LDBG_OS_FUNC_CHOOSER(_f1, _f2, _f3, _f4, ...) _f4 +#define LDBG_OS_FUNC_RECOMPOSER(argsWithParentheses) \ + LDBG_OS_FUNC_CHOOSER argsWithParentheses +#define LDBG_OS_CHOOSE_FROM_ARG_COUNT(...) \ + LDBG_OS_FUNC_RECOMPOSER((__VA_ARGS__, LDBG_OS_TYPE_AND_LEVEL_AND_CALLBACK, \ + LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK, \ + LDBG_OS_CALLBACK, )) +#define LDBG_OS_NO_ARG_EXPANDER() , , , LDBG_OS_CALLBACK +#define _GET_LDBG_OS_MACRO(...) \ + LDBG_OS_CHOOSE_FROM_ARG_COUNT(LDBG_OS_NO_ARG_EXPANDER __VA_ARGS__()) + +/// This macro is the core of the LDBG_OS() macros. It is used to print the +/// debug output with the given stream, level, type, file, and line number. +#define LDBG_OS_IMPL(TYPE_OR_LEVEL, LEVEL_OR_TYPE, CALLBACK, STREAM, FILE, \ + LINE) \ + if (::llvm::DebugFlag && \ + ::llvm::impl::ldbgIsCurrentDebugType(TYPE_OR_LEVEL, LEVEL_OR_TYPE)) { \ + ::llvm::impl::raw_ldbg_ostream LdbgOS{ \ + ::llvm::impl::computePrefix(TYPE_OR_LEVEL, FILE, LINE, LEVEL_OR_TYPE), \ + (STREAM), /*ShouldPrefixNextString=*/true, \ + /*ShouldEmitNewLineOnDestruction=*/true}; \ + CALLBACK(LdbgOS); \ + } + +#define LDBG_OS_TYPE_AND_LEVEL_AND_CALLBACK(TYPE, LEVEL, CALLBACK) \ + LDBG_OS_IMPL(static_cast(TYPE), LEVEL, CALLBACK, LDBG_STREAM, \ + __LLVM_FILE_NAME__, __LINE__) +#define LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK(LEVEL_OR_TYPE, CALLBACK) \ + LDBG_OS_IMPL(LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE), LEVEL_OR_TYPE, \ + CALLBACK, LDBG_STREAM, __LLVM_FILE_NAME__, __LINE__) +#define LDBG_OS_CALLBACK(CALLBACK) \ + LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK(1, CALLBACK) + +// ---------------------------------------------------------------------------- +// General Helpers for the implementation above +// ---------------------------------------------------------------------------- + +/// Return the stringified macro as a StringRef. +/// Also, strip out potential surrounding quotes: this comes from an artifact of +/// the macro stringification, if DEBUG_TYPE is undefined we get the string +/// "DEBUG_TYPE", however if it is defined we get the string with the quotes. +/// For example if DEBUG_TYPE is "foo", we get "\"foo\"" but we want to return +/// "foo" here. +constexpr ::llvm::StringRef strip_quotes(const char *Str) { + ::llvm::StringRef S(Str); + if (Str[0] == '"' && Str[S.size() - 1] == '"') + return StringRef(Str + 1, S.size() - 2); + return S; +} + +/// Fail compilation if DEBUG_TYPE is not defined. +/// This is a workaround for GCC <=12 which does not support static_assert in +/// templated constexpr functions. +#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 12 +#define MISSING_DEBUG_TYPE() \ + extern void missing_DEBUG_TYPE(void); \ + missing_DEBUG_TYPE(); +#else +#define MISSING_DEBUG_TYPE() static_assert(false, "DEBUG_TYPE is not defined"); +#endif + +/// Helper to provide the default level (=1) or type (=DEBUG_TYPE). This is used +/// when a single argument is passed to LDBG() (or LDBG_OS()), if it is an +/// integer we return DEBUG_TYPE and if it is a string we return 1. This fails +/// with a static_assert if we pass an integer and DEBUG_TYPE is not defined. +#define LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE) \ + [](auto LevelOrType) { \ + if constexpr (std::is_integral_v) { \ + constexpr const char *DebugType = LDBG_GET_DEBUG_TYPE_STR(); \ + if constexpr (DebugType[0] == '"') { \ + return ::llvm::impl::strip_quotes(DebugType); \ + } else { \ + MISSING_DEBUG_TYPE(); \ + } \ + } else { \ + return 1; \ + } \ + }(LEVEL_OR_TYPE) + +/// Helpers to get DEBUG_TYPE as a StringRef, even when DEBUG_TYPE is not +/// defined (in which case it expands to "DEBUG_TYPE") +#define LDBG_GET_DEBUG_TYPE_STR__(X) #X +#define LDBG_GET_DEBUG_TYPE_STR_(X) LDBG_GET_DEBUG_TYPE_STR__(X) +#define LDBG_GET_DEBUG_TYPE_STR() LDBG_GET_DEBUG_TYPE_STR_(DEBUG_TYPE) + +/// Helper to call isCurrentDebugType with a StringRef. +static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(StringRef Type, + int Level) { + return ::llvm::isCurrentDebugType(Type.str().c_str(), Level); +} +static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(int Level, + StringRef Type) { + return ::llvm::isCurrentDebugType(Type.str().c_str(), Level); +} /// A raw_ostream that tracks `\n` and print the prefix after each /// newline. @@ -99,6 +247,7 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { std::string Prefix; raw_ostream &Os; bool ShouldPrefixNextString; + bool ShouldEmitNewLineOnDestruction; /// Split the line on newlines and insert the prefix before each /// newline. Forward everything to the underlying stream. @@ -131,12 +280,17 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { public: explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os, - bool ShouldPrefixNextString = true) + bool ShouldPrefixNextString = true, + bool ShouldEmitNewLineOnDestruction = false) : Prefix(std::move(Prefix)), Os(Os), - ShouldPrefixNextString(ShouldPrefixNextString) { + ShouldPrefixNextString(ShouldPrefixNextString), + ShouldEmitNewLineOnDestruction(ShouldEmitNewLineOnDestruction) { SetUnbuffered(); } - ~raw_ldbg_ostream() final {} + ~raw_ldbg_ostream() final { + if (ShouldEmitNewLineOnDestruction) + Os << '\n'; + } /// Forward the current_pos method to the underlying stream. uint64_t current_pos() const final { return Os.tell(); } @@ -173,17 +327,17 @@ getShortFileName(const char *path) { /// "[DebugType] File:Line " /// Where the File is the file name without the path prefix. static LLVM_ATTRIBUTE_UNUSED std::string -computePrefix(const char *DebugType, const char *File, int Line, int Level) { +computePrefix(StringRef DebugType, const char *File, int Line, int Level) { std::string Prefix; raw_string_ostream OsPrefix(Prefix); - if (DebugType) + if (!DebugType.empty()) OsPrefix << "[" << DebugType << ":" << Level << "] "; OsPrefix << File << ":" << Line << " "; return OsPrefix.str(); } /// Overload allowing to swap the order of the DebugType and Level arguments. static LLVM_ATTRIBUTE_UNUSED std::string -computePrefix(int Level, const char *File, int Line, const char *DebugType) { +computePrefix(int Level, const char *File, int Line, StringRef DebugType) { return computePrefix(DebugType, File, Line, Level); } @@ -194,6 +348,7 @@ computePrefix(int Level, const char *File, int Line, const char *DebugType) { #define LDBG(...) \ for (bool _c = false; _c; _c = false) \ ::llvm::nulls() +#define LDBG_OS(...) #endif } // end namespace llvm diff --git a/llvm/unittests/Support/DebugLogTest.cpp b/llvm/unittests/Support/DebugLogTest.cpp index e087705b72586..da3851ed86b35 100644 --- a/llvm/unittests/Support/DebugLogTest.cpp +++ b/llvm/unittests/Support/DebugLogTest.cpp @@ -27,7 +27,7 @@ TEST(DebugLogTest, Basic) { { std::string str; raw_string_ostream os(str); - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, nullptr) << "NoType"; + LDGB_STREAM_LEVEL_AND_TYPE(os, "", 0) << "NoType"; EXPECT_FALSE(StringRef(os.str()).starts_with('[')); EXPECT_TRUE(StringRef(os.str()).ends_with("NoType\n")); } @@ -36,8 +36,8 @@ TEST(DebugLogTest, Basic) { { std::string str; raw_string_ostream os(str); - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "A"; - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << "B"; + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "A"; + LDGB_STREAM_LEVEL_AND_TYPE(os, "B", 0) << "B"; EXPECT_TRUE(StringRef(os.str()).starts_with('[')); EXPECT_THAT(os.str(), AllOf(HasSubstr("A\n"), HasSubstr("B\n"))); } @@ -48,18 +48,18 @@ TEST(DebugLogTest, Basic) { raw_string_ostream os(str); // Just check that the macro doesn't result in dangling else. if (true) - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "A"; + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "A"; else - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "B"; - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << "B"; + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "B"; + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "B") << "B"; EXPECT_THAT(os.str(), AllOf(HasSubstr("A\n"), Not(HasSubstr("B\n")))); int count = 0; auto inc = [&]() { return ++count; }; EXPECT_THAT(count, Eq(0)); - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << inc(); + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << inc(); EXPECT_THAT(count, Eq(1)); - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << inc(); + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "B") << inc(); EXPECT_THAT(count, Eq(1)); } } @@ -75,7 +75,7 @@ TEST(DebugLogTest, BasicWithLevel) { raw_string_ostream os(str); for (auto type : {"A", "B", "C", "D"}) for (int level : llvm::seq(0, 4)) - DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(os, level, type, type, level) + LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(os, level, type, type, level) << level; EXPECT_EQ(os.str(), "[A:0] A:0 0\n[A:1] A:1 1\n[A:2] A:2 2\n[A:3] A:3 " "3\n[B:0] B:0 0\n[B:1] B:1 1\n[C:0] C:0 0\n"); @@ -92,7 +92,7 @@ TEST(DebugLogTest, NegativeLevel) { raw_string_ostream os(str); for (auto type : {"A", "B"}) for (int level : llvm::seq(0, 2)) - DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(os, level, type, type, level) + LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(os, level, type, type, level) << level; EXPECT_EQ(os.str(), "[A:0] A:0 0\n[B:0] B:0 0\n[B:1] B:1 1\n"); } @@ -128,6 +128,115 @@ TEST(DebugLogTest, DestructorPrefix) { // After destructors, nothing should have been printed. EXPECT_EQ(os.str(), ""); } + +TEST(DebugLogTest, LDBG_MACROS) { + llvm::DebugFlag = true; + static const char *DT[] = {"A:3", "B:2"}; + setCurrentDebugTypes(DT, sizeof(DT) / sizeof(DT[0])); + std::string Str; + raw_string_ostream DebugOs(Str); + std::string StrExpected; + raw_string_ostream ExpectedOs(StrExpected); +#undef LDBG_STREAM +#define LDBG_STREAM DebugOs +#define DEBUG_TYPE "A" + LDBG() << "Hello, world!"; + ExpectedOs << "[A:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a level, no type. + LDBG(2) << "Hello, world!"; + ExpectedOs << "[A:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + +// Now the type will be explicit, check we don't use DEBUG_TYPE. +#undef DEBUG_TYPE + + // Test with a type + LDBG("B") << "Hello, world!"; + ExpectedOs << "[B:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a type and a level + LDBG("B", 2) << "Hello, world!"; + ExpectedOs << "[B:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a type not enabled. + LDBG("C", 1) << "Hello, world!"; + EXPECT_EQ(DebugOs.str(), ""); + + // Test with a level not enabled. + LDBG("B", 3) << "Hello, world!"; + EXPECT_EQ(DebugOs.str(), ""); +} + +TEST(DebugLogTest, LDBG_OS_MACROS) { + llvm::DebugFlag = true; + static const char *DT[] = {"A:3", "B:2"}; + setCurrentDebugTypes(DT, sizeof(DT) / sizeof(DT[0])); + std::string Str; + raw_string_ostream DebugOs(Str); + std::string StrExpected; + raw_string_ostream ExpectedOs(StrExpected); +#undef LDBG_STREAM +#define LDBG_STREAM DebugOs +#define DEBUG_TYPE "A" + LDBG_OS([](raw_ostream &Os) { Os << "Hello, world!"; }); + ExpectedOs << "[A:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a level, no type. + LDBG_OS(2, [](raw_ostream &Os) { Os << "Hello, world!"; }); + ExpectedOs << "[A:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + +// Now the type will be explicit, check we don't use DEBUG_TYPE. +#undef DEBUG_TYPE + + // Test with a type. + LDBG_OS("B", [](raw_ostream &Os) { Os << "Hello, world!"; }); + ExpectedOs << "[B:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a type and a level + LDBG_OS("B", 2, [](raw_ostream &Os) { Os << "Hello, world!"; }); + ExpectedOs << "[B:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a type not enabled. + LDBG_OS("C", 1, [](raw_ostream &Os) { Os << "Hello, world!"; }); + EXPECT_EQ(DebugOs.str(), ""); + + // Test with a level not enabled. + LDBG_OS("B", 3, [](raw_ostream &Os) { Os << "Hello, world!"; }); + EXPECT_EQ(DebugOs.str(), ""); +} + #else TEST(DebugLogTest, Basic) { // LDBG should be compiled out in NDEBUG, so just check it compiles and has diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index aba6178a2ea6c..132ed815c354e 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -1151,7 +1151,7 @@ transform::CollectMatchingOp::apply(transform::TransformRewriter &rewriter, std::optional maybeFailure; for (Operation *root : state.getPayloadOps(getRoot())) { WalkResult walkResult = root->walk([&](Operation *op) { - LDBG(1, DEBUG_TYPE_MATCHER) + LDBG(DEBUG_TYPE_MATCHER, 1) << "matching " << OpWithFlags(op, OpPrintingFlags().assumeVerified().skipRegions()) << " @" << op; @@ -1166,7 +1166,7 @@ transform::CollectMatchingOp::apply(transform::TransformRewriter &rewriter, if (diag.isDefiniteFailure()) return WalkResult::interrupt(); if (diag.isSilenceableFailure()) { - LDBG(1, DEBUG_TYPE_MATCHER) << "matcher " << matcher.getName() + LDBG(DEBUG_TYPE_MATCHER, 1) << "matcher " << matcher.getName() << " failed: " << diag.getMessage(); return WalkResult::advance(); } @@ -1298,7 +1298,7 @@ transform::ForeachMatchOp::apply(transform::TransformRewriter &rewriter, if (!getRestrictRoot() && op == root) return WalkResult::advance(); - LDBG(1, DEBUG_TYPE_MATCHER) + LDBG(DEBUG_TYPE_MATCHER, 1) << "matching " << OpWithFlags(op, OpPrintingFlags().assumeVerified().skipRegions()) << " @" << op; @@ -1314,7 +1314,7 @@ transform::ForeachMatchOp::apply(transform::TransformRewriter &rewriter, if (diag.isDefiniteFailure()) return WalkResult::interrupt(); if (diag.isSilenceableFailure()) { - LDBG(1, DEBUG_TYPE_MATCHER) << "matcher " << matcher.getName() + LDBG(DEBUG_TYPE_MATCHER, 1) << "matcher " << matcher.getName() << " failed: " << diag.getMessage(); continue; } @@ -2165,10 +2165,10 @@ DiagnosedSilenceableFailure transform::MatchOperationEmptyOp::matchOperation( ::std::optional<::mlir::Operation *> maybeCurrent, transform::TransformResults &results, transform::TransformState &state) { if (!maybeCurrent.has_value()) { - LDBG(1, DEBUG_TYPE_MATCHER) << "MatchOperationEmptyOp success"; + LDBG(DEBUG_TYPE_MATCHER, 1) << "MatchOperationEmptyOp success"; return DiagnosedSilenceableFailure::success(); } - LDBG(1, DEBUG_TYPE_MATCHER) << "MatchOperationEmptyOp failure"; + LDBG(DEBUG_TYPE_MATCHER, 1) << "MatchOperationEmptyOp failure"; return emitSilenceableError() << "operation is not empty"; } From 83b48b13f3a70bf56053e92593270c519859cfd7 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 12 Sep 2025 10:21:04 +0000 Subject: [PATCH 108/734] [lldb][test] Disable more of TestDAP_attach.py on Windows Flaky on our Windows on Arm bot: https://lab.llvm.org/buildbot/#/builders/141/builds/11465 See #137660 --- lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py index d7d25ca20f85a..d3952e150e125 100644 --- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py +++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py @@ -56,6 +56,7 @@ def test_by_pid(self): self.set_and_hit_breakpoint(continueToExit=True) @skipIfNetBSD # Hangs on NetBSD as well + @skipIfWindows # https://github.com/llvm/llvm-project/issues/137660 def test_by_name(self): """ Tests attaching to a process by process name. From 7289f2cd0c371b2539faa628ec0eea58fa61892c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 12 Sep 2025 19:22:02 +0900 Subject: [PATCH 109/734] CodeGen: Remove MachineFunction argument from getRegClass (#158188) This is a low level utility to parse the MCInstrInfo and should not depend on the state of the function. --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 7 +++--- llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp | 4 +-- llvm/lib/CodeGen/BreakFalseDeps.cpp | 3 +-- llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp | 4 +-- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +- llvm/lib/CodeGen/InitUndef.cpp | 2 +- llvm/lib/CodeGen/MachineInstr.cpp | 4 +-- llvm/lib/CodeGen/MachineLICM.cpp | 2 +- llvm/lib/CodeGen/MachineVerifier.cpp | 8 +++--- llvm/lib/CodeGen/RegisterCoalescer.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 3 +-- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 8 +++--- .../SelectionDAG/ScheduleDAGRRList.cpp | 2 +- llvm/lib/CodeGen/TargetInstrInfo.cpp | 5 ++-- .../lib/CodeGen/TwoAddressInstructionPass.cpp | 5 ++-- .../AArch64/AArch64ConditionalCompares.cpp | 8 +++--- .../AArch64DeadRegisterDefinitionsPass.cpp | 3 +-- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 5 ++-- .../Target/AArch64/AArch64MIPeepholeOpt.cpp | 13 +++++----- .../Target/AArch64/AArch64RegisterInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 5 ++-- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 15 ++++++----- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 7 +++--- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 4 +-- llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 4 +-- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 2 +- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 8 +++--- llvm/lib/Target/ARM/MLxExpansionPass.cpp | 4 +-- llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 3 +-- .../lib/Target/Hexagon/HexagonBitSimplify.cpp | 12 ++++----- .../Target/Hexagon/HexagonFrameLowering.cpp | 4 +-- .../Hexagon/HexagonLoadStoreWidening.cpp | 4 +-- .../Target/Hexagon/HexagonVLIWPacketizer.cpp | 4 +-- .../LoongArchDeadRegisterDefinitions.cpp | 2 +- llvm/lib/Target/Mips/MipsSEInstrInfo.cpp | 4 +-- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 5 ++-- .../RISCV/RISCVDeadRegisterDefinitions.cpp | 2 +- llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 10 +++----- .../SystemZ/SystemZHazardRecognizer.cpp | 3 +-- .../X86/X86AvoidStoreForwardingBlocks.cpp | 5 ++-- llvm/lib/Target/X86/X86DomainReassignment.cpp | 3 +-- llvm/lib/Target/X86/X86InstrInfo.cpp | 25 +++++++++---------- llvm/lib/Target/X86/X86InstrInfo.h | 3 +-- llvm/lib/Target/X86/X86OptimizeLEAs.cpp | 3 +-- .../X86/X86SpeculativeLoadHardening.cpp | 7 +++--- 45 files changed, 107 insertions(+), 133 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 087affcfd55ce..6a624a7052cdd 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -135,10 +135,9 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { /// Given a machine instruction descriptor, returns the register /// class constraint for OpNum, or NULL. - virtual - const TargetRegisterClass *getRegClass(const MCInstrDesc &MCID, unsigned OpNum, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) const; + virtual const TargetRegisterClass * + getRegClass(const MCInstrDesc &MCID, unsigned OpNum, + const TargetRegisterInfo *TRI) const; /// Returns true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 755be089709a5..e0f80b0a57f2b 100644 --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -395,7 +395,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // Note register reference... const TargetRegisterClass *RC = nullptr; if (i < MI.getDesc().getNumOperands()) - RC = TII->getRegClass(MI.getDesc(), i, TRI, MF); + RC = TII->getRegClass(MI.getDesc(), i, TRI); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.emplace(Reg.asMCReg(), RR); } @@ -479,7 +479,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, // Note register reference... const TargetRegisterClass *RC = nullptr; if (i < MI.getDesc().getNumOperands()) - RC = TII->getRegClass(MI.getDesc(), i, TRI, MF); + RC = TII->getRegClass(MI.getDesc(), i, TRI); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.emplace(Reg.asMCReg(), RR); } diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 7eef4a9d12b16..205020af1b30d 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -133,8 +133,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, } // Get the undef operand's register class - const TargetRegisterClass *OpRC = - TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); + const TargetRegisterClass *OpRC = TII->getRegClass(MI->getDesc(), OpIdx, TRI); assert(OpRC && "Not a valid register class"); // If the instruction has a true dependency, we can hide the false depdency diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index e8581f632f8ee..f873616cfedea 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -187,7 +187,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { const TargetRegisterClass *NewRC = nullptr; if (i < MI.getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF); + NewRC = TII->getRegClass(MI.getDesc(), i, TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -316,7 +316,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { const TargetRegisterClass *NewRC = nullptr; if (i < MI.getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF); + NewRC = TII->getRegClass(MI.getDesc(), i, TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 1ccc549e0ec60..055fdc6ad7213 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -114,7 +114,7 @@ Register llvm::constrainOperandRegClass( // Assume physical registers are properly constrained. assert(Reg.isVirtual() && "PhysReg not implemented"); - const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF); + const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI); // Some of the target independent instructions, like COPY, may not impose any // register class constraints on some of their operands: If it's a use, we can // skip constraining as the instruction defining the register would constrain diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index 500a73be7c0f5..e07e598019709 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -232,7 +232,7 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, MachineOperand &UseMO = MI.getOperand(UseOpIdx); if (UseMO.getReg() == MCRegister::NoRegister) { const TargetRegisterClass *RC = - TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF); + TII->getRegClass(MI.getDesc(), UseOpIdx, TRI); Register NewDest = MRI->createVirtualRegister(RC); // We don't have a way to update dead lanes, so keep track of the // new register so that we avoid querying it later. diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 55ec049453607..2c06c5ad4a5e4 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -976,11 +976,9 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetRegisterInfo *TRI) const { assert(getParent() && "Can't have an MBB reference here!"); assert(getMF() && "Can't have an MF reference here!"); - const MachineFunction &MF = *getMF(); - // Most opcodes have fixed constraints in their MCInstrDesc. if (!isInlineAsm()) - return TII->getRegClass(getDesc(), OpIdx, TRI, MF); + return TII->getRegClass(getDesc(), OpIdx, TRI); if (!getOperand(OpIdx).isReg()) return nullptr; diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 286fbfd373b59..4f164e2d53460 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -1420,7 +1420,7 @@ MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *MI, if (NewOpc == 0) return nullptr; const MCInstrDesc &MID = TII->get(NewOpc); MachineFunction &MF = *MI->getMF(); - const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); // Ok, we're unfolding. Create a temporary register and do the unfold. Register Reg = MRI->createVirtualRegister(RC); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 01703fe09b79a..2b24fe49c970b 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -2636,7 +2636,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (MONum < MCID.getNumOperands()) { if (const TargetRegisterClass *DRC = - TII->getRegClass(MCID, MONum, TRI, *MF)) { + TII->getRegClass(MCID, MONum, TRI)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); OS << printReg(Reg, TRI) << " is not a " @@ -2721,11 +2721,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // comply to it. if (!isPreISelGenericOpcode(MCID.getOpcode()) && MONum < MCID.getNumOperands() && - TII->getRegClass(MCID, MONum, TRI, *MF)) { + TII->getRegClass(MCID, MONum, TRI)) { report("Virtual register does not match instruction constraint", MO, MONum); OS << "Expect register class " - << TRI->getRegClassName(TII->getRegClass(MCID, MONum, TRI, *MF)) + << TRI->getRegClassName(TII->getRegClass(MCID, MONum, TRI)) << " but got nothing\n"; return; } @@ -2752,7 +2752,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (MONum < MCID.getNumOperands()) { if (const TargetRegisterClass *DRC = - TII->getRegClass(MCID, MONum, TRI, *MF)) { + TII->getRegClass(MCID, MONum, TRI)) { if (SubIdx) { const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(RC, *MF); diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 514f2f02d6425..b8486f6560c5f 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1374,7 +1374,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg(); - const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); + const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI); if (!DefMI->isImplicitDef()) { if (DstReg.isPhysical()) { Register NewDstReg = DstReg; diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 9467ba14cf895..851d445f75fa8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1964,8 +1964,7 @@ Register FastISel::createResultReg(const TargetRegisterClass *RC) { Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, unsigned OpNum) { if (Op.isVirtual()) { - const TargetRegisterClass *RegClass = - TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI); if (!MRI.constrainRegClass(Op, RegClass)) { // If it's not legal to COPY between the register classes, something // has gone very wrong before we got here. diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 861f76e93f2ce..11bc64c626421 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -125,7 +125,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg, const TargetRegisterClass *RC = nullptr; if (i + II.getNumDefs() < II.getNumOperands()) { RC = TRI->getAllocatableClass( - TII->getRegClass(II, i + II.getNumDefs(), TRI, *MF)); + TII->getRegClass(II, i + II.getNumDefs(), TRI)); } if (!UseRC) UseRC = RC; @@ -197,7 +197,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, // register instead of creating a new vreg. Register VRBase; const TargetRegisterClass *RC = - TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + TRI->getAllocatableClass(TII->getRegClass(II, i, TRI)); // Always let the value type influence the used register class. The // constraints on the instruction may be too lax to represent the value // type correctly. For example, a 64-bit float (X86::FR64) can't live in @@ -330,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, if (II) { const TargetRegisterClass *OpRC = nullptr; if (IIOpNum < II->getNumOperands()) - OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF); + OpRC = TII->getRegClass(*II, IIOpNum, TRI); if (OpRC) { unsigned MinNumRegs = MinRCSize; @@ -409,7 +409,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, Register VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); const TargetRegisterClass *IIRC = - II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) + II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI)) : nullptr; const TargetRegisterClass *OpRC = TLI->isTypeLegal(OpVT) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index a570b71ecd28d..f70b6cddcc099 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -340,7 +340,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned Idx = RegDefPos.GetIdx(); const MCInstrDesc &Desc = TII->get(Opcode); - const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); assert(RC && "Not a valid register class"); RegClass = RC->getID(); // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index f0da03b876d6a..b0009560d3fcb 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -58,10 +58,9 @@ static cl::opt MaxAccumulatorWidth( TargetInstrInfo::~TargetInstrInfo() = default; -const TargetRegisterClass* +const TargetRegisterClass * TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) const { + const TargetRegisterInfo *TRI) const { if (OpNum >= MCID.getNumOperands()) return nullptr; diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 8d94b40a41bea..414e414738b71 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1401,9 +1401,8 @@ bool TwoAddressInstructionImpl::tryInstructionTransform( if (UnfoldMCID.getNumDefs() == 1) { // Unfold the load. LLVM_DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); - const TargetRegisterClass *RC = - TRI->getAllocatableClass( - TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); + const TargetRegisterClass *RC = TRI->getAllocatableClass( + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI)); Register Reg = MRI->createVirtualRegister(RC); SmallVector NewMIs; if (!TII->unfoldMemoryOperand(*MF, MI, Reg, diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 484bc2a4be8fa..cb831963759b5 100644 --- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -630,7 +630,7 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { const MCInstrDesc &MCID = TII->get(Opc); // Create a dummy virtual register for the SUBS def. Register DestReg = - MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF)); + MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI)); // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. BuildMI(*Head, Head->end(), TermDL, MCID) .addReg(DestReg, RegState::Define | RegState::Dead) @@ -639,7 +639,7 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { .addImm(0); // SUBS uses the GPR*sp register classes. MRI->constrainRegClass(HeadCond[2].getReg(), - TII->getRegClass(MCID, 1, TRI, *MF)); + TII->getRegClass(MCID, 1, TRI)); } Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end()); @@ -686,10 +686,10 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); const MCInstrDesc &MCID = TII->get(Opc); MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), - TII->getRegClass(MCID, 0, TRI, *MF)); + TII->getRegClass(MCID, 0, TRI)); if (CmpMI->getOperand(FirstOp + 1).isReg()) MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(), - TII->getRegClass(MCID, 1, TRI, *MF)); + TII->getRegClass(MCID, 1, TRI)); MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID) .add(CmpMI->getOperand(FirstOp)); // Register Rn if (isZBranch) diff --git a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 987dfbcdd53e9..75361f5d313c6 100644 --- a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -115,7 +115,6 @@ static bool atomicReadDroppedOnZero(unsigned Opcode) { void AArch64DeadRegisterDefinitions::processMachineBasicBlock( MachineBasicBlock &MBB) { - const MachineFunction &MF = *MBB.getParent(); for (MachineInstr &MI : MBB) { if (usesFrameIndex(MI)) { // We need to skip this instruction because while it appears to have a @@ -157,7 +156,7 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock( LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n"); continue; } - const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF); + const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI); unsigned NewReg; if (RC == nullptr) { LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index bf3d47ac43607..9a7512b77ecdb 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -10951,9 +10951,8 @@ static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, MRI.getRegClass(NewMI->getOperand(0).getReg())); NewMI->getOperand(I).setReg(Result); } else if (I == ReplaceOprNum) { - MRI.constrainRegClass( - ReplaceReg, - TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent())); + MRI.constrainRegClass(ReplaceReg, + TII->getRegClass(NewMI->getDesc(), I, TRI)); NewMI->getOperand(I).setReg(ReplaceReg); } } diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index fd4ef2aa28f8a..04e76c7abd202 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -594,19 +594,18 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm( // NewDstReg = Opcode.second NewTmpReg Imm1 // Determine register classes for destinations and register operands - MachineFunction *MF = MI.getMF(); const TargetRegisterClass *FirstInstrDstRC = - TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF); + TII->getRegClass(TII->get(Opcode.first), 0, TRI); const TargetRegisterClass *FirstInstrOperandRC = - TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF); + TII->getRegClass(TII->get(Opcode.first), 1, TRI); const TargetRegisterClass *SecondInstrDstRC = (Opcode.first == Opcode.second) ? FirstInstrDstRC - : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF); + : TII->getRegClass(TII->get(Opcode.second), 0, TRI); const TargetRegisterClass *SecondInstrOperandRC = (Opcode.first == Opcode.second) ? FirstInstrOperandRC - : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF); + : TII->getRegClass(TII->get(Opcode.second), 1, TRI); // Get old registers destinations and new register destinations Register DstReg = MI.getOperand(0).getReg(); @@ -785,14 +784,14 @@ bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) { } const TargetRegisterClass *DstRC64 = - TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF()); + TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI); const TargetRegisterClass *DstRC32 = TRI->getSubRegisterClass(DstRC64, AArch64::sub_32); assert(DstRC32 && "Destination register class of UBFMXri doesn't have a " "sub_32 subregister class"); const TargetRegisterClass *SrcRC64 = - TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF()); + TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI); const TargetRegisterClass *SrcRC32 = TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32); assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 " diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 8d167b56e6ca3..2b0c8ad0578bc 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -892,7 +892,7 @@ AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, const MCInstrDesc &MCID = TII->get(AArch64::ADDXri); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); - MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF)); + MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this)); unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); BuildMI(*MBB, Ins, DL, MCID, BaseReg) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 5297816ec1f2b..edc4858cbc974 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -709,7 +709,7 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const { // Verify the register is compatible with the operand. if (const TargetRegisterClass *OpRC = - TII->getRegClass(MI->getDesc(), Fold.UseOpNo, TRI, *MF)) { + TII->getRegClass(MI->getDesc(), Fold.UseOpNo, TRI)) { const TargetRegisterClass *OldRC = MRI->getRegClass(Old.getReg()); const TargetRegisterClass *NewRC = MRI->getRegClass(New->getReg()); unsigned NewSubReg = New->getSubReg(); @@ -2409,8 +2409,7 @@ bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &MI) { unsigned OpIdx = Op - &UseMI->getOperand(0); const MCInstrDesc &InstDesc = UseMI->getDesc(); - const TargetRegisterClass *OpRC = - TII->getRegClass(InstDesc, OpIdx, TRI, *MI.getMF()); + const TargetRegisterClass *OpRC = TII->getRegClass(InstDesc, OpIdx, TRI); if (!OpRC || !TRI->isVectorSuperClass(OpRC)) return false; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 81fc0b4888a73..0361868e2c1e8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2598,7 +2598,7 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, const MCInstrDesc &TID = get(NewOpcode); const TargetRegisterClass *NewRC = - RI.getAllocatableClass(getRegClass(TID, 0, &RI, *MF)); + RI.getAllocatableClass(getRegClass(TID, 0, &RI)); MRI.setRegClass(DestReg, NewRC); UseMO->setReg(DestReg); @@ -3615,7 +3615,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) { const MCInstrDesc &MovDesc = get(MovOp); - const TargetRegisterClass *MovDstRC = getRegClass(MovDesc, 0, &RI, *MF); + const TargetRegisterClass *MovDstRC = getRegClass(MovDesc, 0, &RI); if (Is16Bit) { // We just need to find a correctly sized register class, so the // subregister index compatibility doesn't matter since we're statically @@ -6004,10 +6004,9 @@ adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, return RI.getProperlyAlignedRC(RI.getRegClass(RCID)); } -const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, - unsigned OpNum, const TargetRegisterInfo *TRI, - const MachineFunction &MF) - const { +const TargetRegisterClass * +SIInstrInfo::getRegClass(const MCInstrDesc &TID, unsigned OpNum, + const TargetRegisterInfo *TRI) const { if (OpNum >= TID.getNumOperands()) return nullptr; auto RegClass = TID.operands()[OpNum].RegClass; @@ -6752,8 +6751,8 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI, if (moveFlatAddrToVGPR(MI)) return; - const TargetRegisterClass *DeclaredRC = getRegClass( - MI.getDesc(), SAddr->getOperandNo(), &RI, *MI.getParent()->getParent()); + const TargetRegisterClass *DeclaredRC = + getRegClass(MI.getDesc(), SAddr->getOperandNo(), &RI); Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI, DeclaredRC); SAddr->setReg(ToSGPR); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index e0373e7768435..24a20cc9dcf82 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1536,10 +1536,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { /// Return true if this opcode should not be used by codegen. bool isAsmOnlyOpcode(int MCOp) const; - const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) - const override; + const TargetRegisterClass * + getRegClass(const MCInstrDesc &TID, unsigned OpNum, + const TargetRegisterInfo *TRI) const override; void fixImplicitOperands(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 69d02e7c2934c..f0d1117664983 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1337,10 +1337,10 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, AMDGPU::OpName::data1); const TargetRegisterClass *DataRC0 = - TII->getRegClass(Write2Opc, Data0Idx, TRI, *MF); + TII->getRegClass(Write2Opc, Data0Idx, TRI); const TargetRegisterClass *DataRC1 = - TII->getRegClass(Write2Opc, Data1Idx, TRI, *MF); + TII->getRegClass(Write2Opc, Data1Idx, TRI); if (unsigned SubReg = Data0->getSubReg()) { DataRC0 = TRI->getMatchingSuperRegClass(MRI->getRegClass(Data0->getReg()), diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 0d4ecaec1c23e..e94220af05a0d 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -707,7 +707,7 @@ ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass); - MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx).addImm(Offset); @@ -881,7 +881,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const MCInstrDesc &MCID = MI.getDesc(); const TargetRegisterClass *RegClass = - TII.getRegClass(MCID, FIOperandNum, this, *MI.getParent()->getParent()); + TII.getRegClass(MCID, FIOperandNum, this); if (Offset == 0 && (FrameReg.isVirtual() || RegClass->contains(FrameReg))) // Must be addrmode4/6. diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index a8da70eadea5b..138981ad92a87 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -2364,7 +2364,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, break; const MCInstrDesc &MCID = MI.getDesc(); - const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF); + const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI); if (RegClass && !RegClass->contains(ARM::SP)) HasNonSPFrameIndex = true; diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index eea0cb61af2bf..cd4299b7a1a53 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2424,7 +2424,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps( Ops.pop_back(); const MCInstrDesc &MCID = TII->get(NewOpc); - const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); MRI->constrainRegClass(FirstReg, TRC); MRI->constrainRegClass(SecondReg, TRC); @@ -3014,7 +3014,7 @@ static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, MachineFunction *MF = MI->getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); const MCInstrDesc &MCID = TII->get(MI->getOpcode()); - const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI, *MF); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI); MRI.constrainRegClass(NewBaseReg, TRC); int OldOffset = MI->getOperand(BaseOp + 1).getImm(); @@ -3071,10 +3071,10 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset, const MCInstrDesc &MCID = TII->get(NewOpcode); // Constrain the def register class - const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); MRI.constrainRegClass(NewReg, TRC); // And do the same for the base operand - TRC = TII->getRegClass(MCID, 2, TRI, *MF); + TRC = TII->getRegClass(MCID, 2, TRI); MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC); unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask); diff --git a/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/llvm/lib/Target/ARM/MLxExpansionPass.cpp index 00d8d84654ded..8e1bf1d957400 100644 --- a/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -283,9 +283,7 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, const MCInstrDesc &MCID1 = TII->get(MulOpc); const MCInstrDesc &MCID2 = TII->get(AddSubOpc); - const MachineFunction &MF = *MI->getParent()->getParent(); - Register TmpReg = - MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI, MF)); + Register TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 9dd0e430a0ea1..431ce38ad6e99 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -564,8 +564,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, bool isSub = false; MachineFunction &MF = *MI.getParent()->getParent(); - const TargetRegisterClass *RegClass = - TII.getRegClass(Desc, FrameRegIdx, TRI, MF); + const TargetRegisterClass *RegClass = TII.getRegClass(Desc, FrameRegIdx, TRI); // Memory operands in inline assembly always use AddrModeT2_i12. if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 3b7bd1cd1ba94..52e6b0b083c81 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1751,10 +1751,11 @@ namespace { class BitSimplification : public Transformation { public: BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt, - const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri, - MachineRegisterInfo &mri, MachineFunction &mf) - : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri), - MF(mf), BT(bt) {} + const HexagonInstrInfo &hii, + const HexagonRegisterInfo &hri, MachineRegisterInfo &mri, + MachineFunction &mf) + : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri), BT(bt) { + } bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; @@ -1797,7 +1798,6 @@ namespace { const HexagonInstrInfo &HII; const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; - MachineFunction &MF; BitTracker &BT; }; @@ -1886,7 +1886,7 @@ bool BitSimplification::matchHalf(unsigned SelfR, bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum) { - auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF); + auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI); auto *RRC = HBS::getFinalVRegClass(R, MRI); return OpRC->hasSubClassEq(RRC); } diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index b3c61e1829bf9..dd343d9fbe79f 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -2225,7 +2225,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, if (!Bad) { // If the addressing mode is ok, check the register class. unsigned OpNum = Load ? 0 : 2; - auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF); + auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI); RC = getCommonRC(SI.RC, RC); if (RC == nullptr) Bad = true; @@ -2395,7 +2395,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(), SrcOp.getSubReg() }; - auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF); + auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI); // The this-> is needed to unconfuse MSVC. Register FoundR = this->findPhysReg(MF, Range, IM, DM, RC); LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI) diff --git a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp index b8060fb66680f..7cbd81ff227e1 100644 --- a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp +++ b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp @@ -646,7 +646,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, MachineInstr *CombI; if (Acc != 0) { const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); - const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI); Register VReg = MF->getRegInfo().createVirtualRegister(RC); MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc); NG.push_back(TfrI); @@ -677,7 +677,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, } else { // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); - const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI); Register VReg = MF->getRegInfo().createVirtualRegister(RC); MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc)); NG.push_back(TfrI); diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index bc486cd562bf4..cb88d1ac4af9f 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -653,7 +653,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, const MCInstrDesc& MCID = PacketMI.getDesc(); // First operand is always the result. - const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF); + const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI); // Double regs can not feed into new value store: PRM section: 5.4.2.2. if (PacketRC == &Hexagon::DoubleRegsRegClass) return false; @@ -866,7 +866,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, return false; const MCInstrDesc& MCID = PI.getDesc(); - const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF); + const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI); if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass) return false; diff --git a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp index 069b181791ac7..0ccebeb393267 100644 --- a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp @@ -86,7 +86,7 @@ bool LoongArchDeadRegisterDefinitions::runOnMachineFunction( continue; LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n "; MI.print(dbgs())); - const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF); + const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI); if (!(RC && RC->contains(LoongArch::R0))) { LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); continue; diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index 9f00369d8998a..dbdbb179a583d 100644 --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -682,8 +682,8 @@ MipsSEInstrInfo::compareOpndSize(unsigned Opc, const MCInstrDesc &Desc = get(Opc); assert(Desc.NumOperands == 2 && "Unary instruction expected."); const MipsRegisterInfo *RI = &getRegisterInfo(); - unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); - unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); + unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI)); + unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI)); return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 366bc73ac52f3..85b40727ff296 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -2023,7 +2023,7 @@ Register PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const TargetRegisterClass *RC = getPointerRegClass(); Register BaseReg = MRI.createVirtualRegister(RC); - MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx).addImm(Offset); @@ -2051,8 +2051,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); const MCInstrDesc &MCID = MI.getDesc(); MachineRegisterInfo &MRI = MF.getRegInfo(); - MRI.constrainRegClass(BaseReg, - TII.getRegClass(MCID, FIOperandNum, this, MF)); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, FIOperandNum, this)); } bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp index 1c7aa738f6215..51180f548ca6d 100644 --- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp @@ -89,7 +89,7 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n "; MI.print(dbgs())); Register X0Reg; - const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF); + const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI); if (RC && RC->contains(RISCV::X0)) { X0Reg = RISCV::X0; } else if (RC && RC->contains(RISCV::X0_W)) { diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 62651185137c9..ffba2843bde1f 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -455,8 +455,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { True->getOperand(1).setReg(MI.getOperand(2).getReg()); // If True is masked then its passthru needs to be in VRNoV0. MRI->constrainRegClass(True->getOperand(1).getReg(), - TII->getRegClass(True->getDesc(), 1, TRI, - *True->getParent()->getParent())); + TII->getRegClass(True->getDesc(), 1, TRI)); } MI.setDesc(TII->get(NewOpc)); @@ -674,10 +673,9 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { SrcPassthru.setReg(Passthru.getReg()); // If Src is masked then its passthru needs to be in VRNoV0. if (Passthru.getReg() != RISCV::NoRegister) - MRI->constrainRegClass(Passthru.getReg(), - TII->getRegClass(Src->getDesc(), - SrcPassthru.getOperandNo(), TRI, - *Src->getParent()->getParent())); + MRI->constrainRegClass( + Passthru.getReg(), + TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI)); } if (RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags)) { diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index 34888f44aa221..34d58e05ff3e4 100644 --- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -115,12 +115,11 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { } bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { - const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); const MCInstrDesc &MID = MI->getDesc(); unsigned Count = 0; for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { - const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); + const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI); if (RC == nullptr) continue; if (OpIdx >= MID.getNumDefs() && diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index 3ac7c8222b54b..d2e35277419f7 100644 --- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -388,7 +388,7 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode, MachineMemOperand *SMMO = *StoreInst->memoperands_begin(); Register Reg1 = MRI->createVirtualRegister( - TII->getRegClass(TII->get(NLoadOpcode), 0, TRI, *(MBB->getParent()))); + TII->getRegClass(TII->get(NLoadOpcode), 0, TRI)); MachineInstr *NewLoad = BuildMI(*MBB, LoadInst, LoadInst->getDebugLoc(), TII->get(NLoadOpcode), Reg1) @@ -553,8 +553,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) { } unsigned X86AvoidSFBPass::getRegSizeInBytes(MachineInstr *LoadInst) { - const auto *TRC = TII->getRegClass(TII->get(LoadInst->getOpcode()), 0, TRI, - *LoadInst->getParent()->getParent()); + const auto *TRC = TII->getRegClass(TII->get(LoadInst->getOpcode()), 0, TRI); return TRI->getRegSizeInBits(*TRC) / 8; } diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 93e55ca5fabf9..339e2f3b7209e 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -175,8 +175,7 @@ class InstrReplacerDstCOPY : public InstrConverterBase { const DebugLoc &DL = MI->getDebugLoc(); Register Reg = MRI->createVirtualRegister( - TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(), - *MBB->getParent())); + TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo())); MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg); for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) Bld.add(MO); diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f109e29c0bff0..58d526269ff3c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -94,9 +94,8 @@ X86InstrInfo::X86InstrInfo(const X86Subtarget &STI) const TargetRegisterClass * X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) const { - auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI, MF); + const TargetRegisterInfo *TRI) const { + auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI); // If the target does not have egpr, then r16-r31 will be resereved for all // instructions. if (!RC || !Subtarget.hasEGPR()) @@ -7249,8 +7248,8 @@ static void updateOperandRegConstraints(MachineFunction &MF, if (!Reg.isVirtual()) continue; - auto *NewRC = MRI.constrainRegClass( - Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI, MF)); + auto *NewRC = + MRI.constrainRegClass(Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI)); if (!NewRC) { LLVM_DEBUG( dbgs() << "WARNING: Unable to update register constraint for operand " @@ -7348,7 +7347,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( unsigned SrcIdx = (Imm >> 6) & 3; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI); unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if ((Size == 0 || Size >= 16) && RCSize >= 16 && (MI.getOpcode() != X86::INSERTPSrri || Alignment >= Align(4))) { @@ -7373,7 +7372,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( // TODO: In most cases AVX doesn't have a 8-byte alignment requirement. if (OpNum == 2) { const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI); unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(8)) { unsigned NewOpCode = @@ -7392,7 +7391,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( // table twice. if (OpNum == 2) { const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI); unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment < Align(16)) { MachineInstr *NewMI = @@ -7527,7 +7526,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( bool NarrowToMOV32rm = false; if (Size) { const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI); unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -8495,7 +8494,7 @@ bool X86InstrInfo::unfoldMemoryOperand( const MCInstrDesc &MCID = get(Opc); - const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // TODO: Check if 32-byte or greater accesses are slow too? if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass && @@ -8606,7 +8605,7 @@ bool X86InstrInfo::unfoldMemoryOperand( // Emit the store instruction. if (UnfoldStore) { - const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF); + const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI); auto MMOs = extractStoreMMOs(MI.memoperands(), MF); unsigned Alignment = std::max(TRI.getSpillSize(*DstRC), 16); bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment; @@ -8638,7 +8637,7 @@ bool X86InstrInfo::unfoldMemoryOperand( const MCInstrDesc &MCID = get(Opc); MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); unsigned NumDefs = MCID.NumDefs; std::vector AddrOps; std::vector BeforeOps; @@ -8689,7 +8688,7 @@ bool X86InstrInfo::unfoldMemoryOperand( std::vector VTs; const TargetRegisterClass *DstRC = nullptr; if (MCID.getNumDefs() > 0) { - DstRC = getRegClass(MCID, 0, &RI, MF); + DstRC = getRegClass(MCID, 0, &RI); VTs.push_back(*TRI.legalclasstypes_begin(*DstRC)); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index f087b7f20ff67..86133b3d969b1 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -248,8 +248,7 @@ class X86InstrInfo final : public X86GenInstrInfo { /// GR*_NOREX2RegClass (Returned register class) const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) const override; + const TargetRegisterInfo *TRI) const override; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp index 8dcd4b8cf7551..167bed132cd12 100644 --- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp @@ -339,7 +339,6 @@ int X86OptimizeLEAPass::calcInstrDist(const MachineInstr &First, bool X86OptimizeLEAPass::chooseBestLEA( const SmallVectorImpl &List, const MachineInstr &MI, MachineInstr *&BestLEA, int64_t &AddrDispShift, int &Dist) { - const MachineFunction *MF = MI.getParent()->getParent(); const MCInstrDesc &Desc = MI.getDesc(); int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags) + X86II::getOperandBias(Desc); @@ -360,7 +359,7 @@ bool X86OptimizeLEAPass::chooseBestLEA( // example MOV8mr_NOREX. We could constrain the register class of the LEA // def to suit MI, however since this case is very rare and hard to // reproduce in a test it's just more reliable to skip the LEA. - if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) != + if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI) != MRI->getRegClass(DefMI->getOperand(0).getReg())) continue; diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index c28de14a97874..e0b3b61e29175 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -836,13 +836,12 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( /// a way to unfold into a newly created vreg rather than requiring a register /// input. static const TargetRegisterClass * -getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, - unsigned Opcode) { +getRegClassForUnfoldedLoad(const X86InstrInfo &TII, unsigned Opcode) { unsigned Index; unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold( Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index); const MCInstrDesc &MCID = TII.get(UnfoldedOpc); - return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF); + return TII.getRegClass(MCID, Index, &TII.getRegisterInfo()); } void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( @@ -898,7 +897,7 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( // Use the generic unfold logic now that we know we're dealing with // expected instructions. // FIXME: We don't have test coverage for all of these! - auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode()); + auto *UnfoldedRC = getRegClassForUnfoldedLoad(*TII, MI.getOpcode()); if (!UnfoldedRC) { LLVM_DEBUG(dbgs() << "ERROR: Unable to unfold load from instruction:\n"; From 40d8af816c4a7262b198c6d6459da815c45e87d0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 12 Sep 2025 11:38:14 +0100 Subject: [PATCH 110/734] Revert "Introduce LDBG_OS() macro as a variant of LDBG() (#157194)" (#158264) Reverts llvm/llvm-project#158260 second attempt to land this fixed some bots, but left others broken, need an extra iteration! --- llvm/include/llvm/Support/Debug.h | 5 + llvm/include/llvm/Support/DebugLog.h | 279 ++++-------------- llvm/unittests/Support/DebugLogTest.cpp | 129 +------- .../lib/Dialect/Transform/IR/TransformOps.cpp | 12 +- 4 files changed, 83 insertions(+), 342 deletions(-) diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h index b73f2d7c8b852..a7795d403721c 100644 --- a/llvm/include/llvm/Support/Debug.h +++ b/llvm/include/llvm/Support/Debug.h @@ -44,6 +44,11 @@ class raw_ostream; /// level, return false. LLVM_ABI bool isCurrentDebugType(const char *Type, int Level = 0); +/// Overload allowing to swap the order of the Type and Level arguments. +LLVM_ABI inline bool isCurrentDebugType(int Level, const char *Type) { + return isCurrentDebugType(Type, Level); +} + /// setCurrentDebugType - Set the current debug type, as if the -debug-only=X /// option were specified. Note that DebugFlag also needs to be set to true for /// debug output to be produced. diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h index f7748bc9904b1..dce706e196bde 100644 --- a/llvm/include/llvm/Support/DebugLog.h +++ b/llvm/include/llvm/Support/DebugLog.h @@ -19,55 +19,52 @@ namespace llvm { #ifndef NDEBUG -/// LDBG() is a macro that can be used as a raw_ostream for debugging. -/// It will stream the output to the dbgs() stream, with a prefix of the -/// debug type and the file and line number. A trailing newline is added to the -/// output automatically. If the streamed content contains a newline, the prefix -/// is added to each beginning of a new line. Nothing is printed if the debug -/// output is not enabled or the debug type does not match. -/// -/// E.g., -/// LDBG() << "Bitset contains: " << Bitset; -/// is equivalent to -/// LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] " << __FILE__ << ":" << -/// __LINE__ << " " -/// << "Bitset contains: " << Bitset << "\n"); -/// +// LDBG() is a macro that can be used as a raw_ostream for debugging. +// It will stream the output to the dbgs() stream, with a prefix of the +// debug type and the file and line number. A trailing newline is added to the +// output automatically. If the streamed content contains a newline, the prefix +// is added to each beginning of a new line. Nothing is printed if the debug +// output is not enabled or the debug type does not match. +// +// E.g., +// LDBG() << "Bitset contains: " << Bitset; +// is somehow equivalent to +// LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] " << __FILE__ << ":" << +// __LINE__ << " " +// << "Bitset contains: " << Bitset << "\n"); +// // An optional `level` argument can be provided to control the verbosity of the -/// output. The default level is 1, and is in increasing level of verbosity. -/// -/// The `level` argument can be a literal integer, or a macro that evaluates to -/// an integer. -/// -/// An optional `type` argument can be provided to control the debug type. The -/// default type is DEBUG_TYPE. The `type` argument can be a literal string, or -/// a macro that evaluates to a string. -/// -/// E.g., -/// LDBG(2) << "Bitset contains: " << Bitset; -/// LDBG("debug_type") << "Bitset contains: " << Bitset; -/// LDBG("debug_type", 2) << "Bitset contains: " << Bitset; +// output. The default level is 1, and is in increasing level of verbosity. +// +// The `level` argument can be a literal integer, or a macro that evaluates to +// an integer. +// +// An optional `type` argument can be provided to control the debug type. The +// default type is DEBUG_TYPE. The `type` argument can be a literal string, or a +// macro that evaluates to a string. #define LDBG(...) _GET_LDBG_MACRO(__VA_ARGS__)(__VA_ARGS__) -/// LDBG_OS() is a macro that behaves like LDBG() but instead of directly using -/// it to stream the output, it takes a callback function that will be called -/// with a raw_ostream. -/// This is useful when you need to pass a `raw_ostream` to a helper function to -/// be able to print (when the `<<` operator is not available). -/// -/// E.g., -/// LDBG_OS([&] (raw_ostream &Os) { -/// Os << "Pass Manager contains: "; -/// pm.printAsTextual(Os); -/// }); -/// -/// Just like LDBG(), it optionally accepts a `level` and `type` arguments. -/// E.g., -/// LDBG_OS(2, [&] (raw_ostream &Os) { ... }); -/// LDBG_OS("debug_type", [&] (raw_ostream &Os) { ... }); -/// LDBG_OS("debug_type", 2, [&] (raw_ostream &Os) { ... }); -/// -#define LDBG_OS(...) _GET_LDBG_OS_MACRO(__VA_ARGS__)(__VA_ARGS__) +// Helper macros to choose the correct macro based on the number of arguments. +#define LDBG_FUNC_CHOOSER(_f1, _f2, _f3, ...) _f3 +#define LDBG_FUNC_RECOMPOSER(argsWithParentheses) \ + LDBG_FUNC_CHOOSER argsWithParentheses +#define LDBG_CHOOSE_FROM_ARG_COUNT(...) \ + LDBG_FUNC_RECOMPOSER( \ + (__VA_ARGS__, LDBG_LOG_LEVEL_WITH_TYPE, LDBG_LOG_LEVEL, )) +#define LDBG_NO_ARG_EXPANDER() , , LDBG_LOG_LEVEL_1 +#define _GET_LDBG_MACRO(...) \ + LDBG_CHOOSE_FROM_ARG_COUNT(LDBG_NO_ARG_EXPANDER __VA_ARGS__()) + +// Dispatch macros to support the `level` argument or none (default to 1) +#define LDBG_LOG_LEVEL(LEVEL) \ + DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), LEVEL, DEBUG_TYPE) +#define LDBG_LOG_LEVEL_1() LDBG_LOG_LEVEL(1) +// This macro is a helper when LDBG() is called with 2 arguments. +// In this case we want to allow the order of the arguments to be swapped. +// We rely on the fact that the `level` argument is an integer, and the `type` +// is a string and dispatch to a C++ API that is overloaded. +#define LDBG_LOG_LEVEL_WITH_TYPE(LEVEL_OR_TYPE, TYPE_OR_LEVEL) \ + DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), (LEVEL_OR_TYPE), (TYPE_OR_LEVEL)) // We want the filename without the full path. We are using the __FILE__ macro // and a constexpr function to strip the path prefix. We can avoid the frontend @@ -79,167 +76,22 @@ namespace llvm { #define __LLVM_FILE_NAME__ ::llvm::impl::getShortFileName(__FILE__) #endif -// Everything below are implementation details of the macros above. -namespace impl { - -/// This macro expands to the stream to use for output, we use a macro to allow -/// unit-testing to override. -#define LDBG_STREAM ::llvm::dbgs() - -// ---------------------------------------------------------------------------- -// LDBG() implementation -// ---------------------------------------------------------------------------- - -// Helper macros to choose the correct LDBG() macro based on the number of -// arguments. -#define LDBG_FUNC_CHOOSER(_f1, _f2, _f3, ...) _f3 -#define LDBG_FUNC_RECOMPOSER(argsWithParentheses) \ - LDBG_FUNC_CHOOSER argsWithParentheses -#define LDBG_CHOOSE_FROM_ARG_COUNT(...) \ - LDBG_FUNC_RECOMPOSER((__VA_ARGS__, LDBG_TYPE_AND_LEVEL, LDBG_LEVEL_OR_TYPE, )) -#define LDBG_NO_ARG_EXPANDER() , , LDBG_NO_ARG -#define _GET_LDBG_MACRO(...) \ - LDBG_CHOOSE_FROM_ARG_COUNT(LDBG_NO_ARG_EXPANDER __VA_ARGS__()) - -/// This macro is the core of the LDBG() implementation. It is used to print the -/// debug output with the given stream, level, type, file, and line number. -#define LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(STREAM, LEVEL_OR_TYPE, \ - TYPE_OR_LEVEL, FILE, LINE) \ - for (bool _c = ::llvm::DebugFlag && ::llvm::impl::ldbgIsCurrentDebugType( \ - TYPE_OR_LEVEL, LEVEL_OR_TYPE); \ +#define DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(STREAM, LEVEL, TYPE, FILE, \ + LINE) \ + for (bool _c = \ + (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE, LEVEL)); \ _c; _c = false) \ - ::llvm::impl::raw_ldbg_ostream{ \ - ::llvm::impl::computePrefix(TYPE_OR_LEVEL, FILE, LINE, LEVEL_OR_TYPE), \ - (STREAM), /*ShouldPrefixNextString=*/true, \ - /*ShouldEmitNewLineOnDestruction=*/true} \ - .asLvalue() - -/// These macros are helpers to implement LDBG() with an increasing amount of -/// optional arguments made explicit. -#define LDBG_STREAM_LEVEL_TYPE_AND_FILE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL, \ - FILE) \ - LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL, \ - FILE, __LINE__) -#define LDGB_STREAM_LEVEL_AND_TYPE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL) \ - LDBG_STREAM_LEVEL_TYPE_AND_FILE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL, \ - __LLVM_FILE_NAME__) -/// This macro is a helper when LDBG() is called with 2 arguments. -/// In this case we want to force the first argument to be the type for -/// consistency in the codebase. -/// We trick this by casting the first argument to a (const char *) which -/// won't compile with an int. -#define LDBG_TYPE_AND_LEVEL(TYPE, LEVEL) \ - LDGB_STREAM_LEVEL_AND_TYPE(LDBG_STREAM, static_cast(TYPE), \ - (LEVEL)) - -/// When a single argument is provided. This can be either a level or the debug -/// type. If a level is provided, we default the debug type to DEBUG_TYPE, if a -/// string is provided, we default the level to 1. -#define LDBG_LEVEL_OR_TYPE(LEVEL_OR_TYPE) \ - LDGB_STREAM_LEVEL_AND_TYPE(LDBG_STREAM, (LEVEL_OR_TYPE), \ - LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE)) -#define LDBG_NO_ARG() LDBG_LEVEL_OR_TYPE(1) - -// ---------------------------------------------------------------------------- -// LDBG_OS() implementation -// ---------------------------------------------------------------------------- - -// Helper macros to choose the correct LDBG_OS() macro based on the number of -// arguments. -#define LDBG_OS_FUNC_CHOOSER(_f1, _f2, _f3, _f4, ...) _f4 -#define LDBG_OS_FUNC_RECOMPOSER(argsWithParentheses) \ - LDBG_OS_FUNC_CHOOSER argsWithParentheses -#define LDBG_OS_CHOOSE_FROM_ARG_COUNT(...) \ - LDBG_OS_FUNC_RECOMPOSER((__VA_ARGS__, LDBG_OS_TYPE_AND_LEVEL_AND_CALLBACK, \ - LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK, \ - LDBG_OS_CALLBACK, )) -#define LDBG_OS_NO_ARG_EXPANDER() , , , LDBG_OS_CALLBACK -#define _GET_LDBG_OS_MACRO(...) \ - LDBG_OS_CHOOSE_FROM_ARG_COUNT(LDBG_OS_NO_ARG_EXPANDER __VA_ARGS__()) - -/// This macro is the core of the LDBG_OS() macros. It is used to print the -/// debug output with the given stream, level, type, file, and line number. -#define LDBG_OS_IMPL(TYPE_OR_LEVEL, LEVEL_OR_TYPE, CALLBACK, STREAM, FILE, \ - LINE) \ - if (::llvm::DebugFlag && \ - ::llvm::impl::ldbgIsCurrentDebugType(TYPE_OR_LEVEL, LEVEL_OR_TYPE)) { \ - ::llvm::impl::raw_ldbg_ostream LdbgOS{ \ - ::llvm::impl::computePrefix(TYPE_OR_LEVEL, FILE, LINE, LEVEL_OR_TYPE), \ - (STREAM), /*ShouldPrefixNextString=*/true, \ - /*ShouldEmitNewLineOnDestruction=*/true}; \ - CALLBACK(LdbgOS); \ - } - -#define LDBG_OS_TYPE_AND_LEVEL_AND_CALLBACK(TYPE, LEVEL, CALLBACK) \ - LDBG_OS_IMPL(static_cast(TYPE), LEVEL, CALLBACK, LDBG_STREAM, \ - __LLVM_FILE_NAME__, __LINE__) -#define LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK(LEVEL_OR_TYPE, CALLBACK) \ - LDBG_OS_IMPL(LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE), LEVEL_OR_TYPE, \ - CALLBACK, LDBG_STREAM, __LLVM_FILE_NAME__, __LINE__) -#define LDBG_OS_CALLBACK(CALLBACK) \ - LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK(1, CALLBACK) - -// ---------------------------------------------------------------------------- -// General Helpers for the implementation above -// ---------------------------------------------------------------------------- + for (::llvm::impl::raw_ldbg_ostream LdbgOS{ \ + ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), (STREAM)}; \ + _c; _c = false) \ + ::llvm::impl::RAIINewLineStream{LdbgOS}.asLvalue() -/// Return the stringified macro as a StringRef. -/// Also, strip out potential surrounding quotes: this comes from an artifact of -/// the macro stringification, if DEBUG_TYPE is undefined we get the string -/// "DEBUG_TYPE", however if it is defined we get the string with the quotes. -/// For example if DEBUG_TYPE is "foo", we get "\"foo\"" but we want to return -/// "foo" here. -constexpr ::llvm::StringRef strip_quotes(const char *Str) { - ::llvm::StringRef S(Str); - if (Str[0] == '"' && Str[S.size() - 1] == '"') - return StringRef(Str + 1, S.size() - 2); - return S; -} - -/// Fail compilation if DEBUG_TYPE is not defined. -/// This is a workaround for GCC <=12 which does not support static_assert in -/// templated constexpr functions. -#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 12 -#define MISSING_DEBUG_TYPE() \ - extern void missing_DEBUG_TYPE(void); \ - missing_DEBUG_TYPE(); -#else -#define MISSING_DEBUG_TYPE() static_assert(false, "DEBUG_TYPE is not defined"); -#endif - -/// Helper to provide the default level (=1) or type (=DEBUG_TYPE). This is used -/// when a single argument is passed to LDBG() (or LDBG_OS()), if it is an -/// integer we return DEBUG_TYPE and if it is a string we return 1. This fails -/// with a static_assert if we pass an integer and DEBUG_TYPE is not defined. -#define LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE) \ - [](auto LevelOrType) { \ - if constexpr (std::is_integral_v) { \ - constexpr const char *DebugType = LDBG_GET_DEBUG_TYPE_STR(); \ - if constexpr (DebugType[0] == '"') { \ - return ::llvm::impl::strip_quotes(DebugType); \ - } else { \ - MISSING_DEBUG_TYPE(); \ - } \ - } else { \ - return 1; \ - } \ - }(LEVEL_OR_TYPE) +#define DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, FILE) \ + DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(STREAM, LEVEL, TYPE, FILE, __LINE__) +#define DEBUGLOG_WITH_STREAM_AND_TYPE(STREAM, LEVEL, TYPE) \ + DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, __LLVM_FILE_NAME__) -/// Helpers to get DEBUG_TYPE as a StringRef, even when DEBUG_TYPE is not -/// defined (in which case it expands to "DEBUG_TYPE") -#define LDBG_GET_DEBUG_TYPE_STR__(X) #X -#define LDBG_GET_DEBUG_TYPE_STR_(X) LDBG_GET_DEBUG_TYPE_STR__(X) -#define LDBG_GET_DEBUG_TYPE_STR() LDBG_GET_DEBUG_TYPE_STR_(DEBUG_TYPE) - -/// Helper to call isCurrentDebugType with a StringRef. -static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(StringRef Type, - int Level) { - return ::llvm::isCurrentDebugType(Type.str().c_str(), Level); -} -static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(int Level, - StringRef Type) { - return ::llvm::isCurrentDebugType(Type.str().c_str(), Level); -} +namespace impl { /// A raw_ostream that tracks `\n` and print the prefix after each /// newline. @@ -247,7 +99,6 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { std::string Prefix; raw_ostream &Os; bool ShouldPrefixNextString; - bool ShouldEmitNewLineOnDestruction; /// Split the line on newlines and insert the prefix before each /// newline. Forward everything to the underlying stream. @@ -280,17 +131,12 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { public: explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os, - bool ShouldPrefixNextString = true, - bool ShouldEmitNewLineOnDestruction = false) + bool ShouldPrefixNextString = true) : Prefix(std::move(Prefix)), Os(Os), - ShouldPrefixNextString(ShouldPrefixNextString), - ShouldEmitNewLineOnDestruction(ShouldEmitNewLineOnDestruction) { + ShouldPrefixNextString(ShouldPrefixNextString) { SetUnbuffered(); } - ~raw_ldbg_ostream() final { - if (ShouldEmitNewLineOnDestruction) - Os << '\n'; - } + ~raw_ldbg_ostream() final {} /// Forward the current_pos method to the underlying stream. uint64_t current_pos() const final { return Os.tell(); } @@ -327,17 +173,17 @@ getShortFileName(const char *path) { /// "[DebugType] File:Line " /// Where the File is the file name without the path prefix. static LLVM_ATTRIBUTE_UNUSED std::string -computePrefix(StringRef DebugType, const char *File, int Line, int Level) { +computePrefix(const char *DebugType, const char *File, int Line, int Level) { std::string Prefix; raw_string_ostream OsPrefix(Prefix); - if (!DebugType.empty()) + if (DebugType) OsPrefix << "[" << DebugType << ":" << Level << "] "; OsPrefix << File << ":" << Line << " "; return OsPrefix.str(); } /// Overload allowing to swap the order of the DebugType and Level arguments. static LLVM_ATTRIBUTE_UNUSED std::string -computePrefix(int Level, const char *File, int Line, StringRef DebugType) { +computePrefix(int Level, const char *File, int Line, const char *DebugType) { return computePrefix(DebugType, File, Line, Level); } @@ -348,7 +194,6 @@ computePrefix(int Level, const char *File, int Line, StringRef DebugType) { #define LDBG(...) \ for (bool _c = false; _c; _c = false) \ ::llvm::nulls() -#define LDBG_OS(...) #endif } // end namespace llvm diff --git a/llvm/unittests/Support/DebugLogTest.cpp b/llvm/unittests/Support/DebugLogTest.cpp index da3851ed86b35..e087705b72586 100644 --- a/llvm/unittests/Support/DebugLogTest.cpp +++ b/llvm/unittests/Support/DebugLogTest.cpp @@ -27,7 +27,7 @@ TEST(DebugLogTest, Basic) { { std::string str; raw_string_ostream os(str); - LDGB_STREAM_LEVEL_AND_TYPE(os, "", 0) << "NoType"; + DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, nullptr) << "NoType"; EXPECT_FALSE(StringRef(os.str()).starts_with('[')); EXPECT_TRUE(StringRef(os.str()).ends_with("NoType\n")); } @@ -36,8 +36,8 @@ TEST(DebugLogTest, Basic) { { std::string str; raw_string_ostream os(str); - LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "A"; - LDGB_STREAM_LEVEL_AND_TYPE(os, "B", 0) << "B"; + DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "A"; + DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << "B"; EXPECT_TRUE(StringRef(os.str()).starts_with('[')); EXPECT_THAT(os.str(), AllOf(HasSubstr("A\n"), HasSubstr("B\n"))); } @@ -48,18 +48,18 @@ TEST(DebugLogTest, Basic) { raw_string_ostream os(str); // Just check that the macro doesn't result in dangling else. if (true) - LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "A"; + DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "A"; else - LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "B"; - LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "B") << "B"; + DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "B"; + DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << "B"; EXPECT_THAT(os.str(), AllOf(HasSubstr("A\n"), Not(HasSubstr("B\n")))); int count = 0; auto inc = [&]() { return ++count; }; EXPECT_THAT(count, Eq(0)); - LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << inc(); + DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << inc(); EXPECT_THAT(count, Eq(1)); - LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "B") << inc(); + DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << inc(); EXPECT_THAT(count, Eq(1)); } } @@ -75,7 +75,7 @@ TEST(DebugLogTest, BasicWithLevel) { raw_string_ostream os(str); for (auto type : {"A", "B", "C", "D"}) for (int level : llvm::seq(0, 4)) - LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(os, level, type, type, level) + DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(os, level, type, type, level) << level; EXPECT_EQ(os.str(), "[A:0] A:0 0\n[A:1] A:1 1\n[A:2] A:2 2\n[A:3] A:3 " "3\n[B:0] B:0 0\n[B:1] B:1 1\n[C:0] C:0 0\n"); @@ -92,7 +92,7 @@ TEST(DebugLogTest, NegativeLevel) { raw_string_ostream os(str); for (auto type : {"A", "B"}) for (int level : llvm::seq(0, 2)) - LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(os, level, type, type, level) + DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(os, level, type, type, level) << level; EXPECT_EQ(os.str(), "[A:0] A:0 0\n[B:0] B:0 0\n[B:1] B:1 1\n"); } @@ -128,115 +128,6 @@ TEST(DebugLogTest, DestructorPrefix) { // After destructors, nothing should have been printed. EXPECT_EQ(os.str(), ""); } - -TEST(DebugLogTest, LDBG_MACROS) { - llvm::DebugFlag = true; - static const char *DT[] = {"A:3", "B:2"}; - setCurrentDebugTypes(DT, sizeof(DT) / sizeof(DT[0])); - std::string Str; - raw_string_ostream DebugOs(Str); - std::string StrExpected; - raw_string_ostream ExpectedOs(StrExpected); -#undef LDBG_STREAM -#define LDBG_STREAM DebugOs -#define DEBUG_TYPE "A" - LDBG() << "Hello, world!"; - ExpectedOs << "[A:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) - << " Hello, world!\n"; - EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); - Str.clear(); - StrExpected.clear(); - - // Test with a level, no type. - LDBG(2) << "Hello, world!"; - ExpectedOs << "[A:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) - << " Hello, world!\n"; - EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); - Str.clear(); - StrExpected.clear(); - -// Now the type will be explicit, check we don't use DEBUG_TYPE. -#undef DEBUG_TYPE - - // Test with a type - LDBG("B") << "Hello, world!"; - ExpectedOs << "[B:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) - << " Hello, world!\n"; - EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); - Str.clear(); - StrExpected.clear(); - - // Test with a type and a level - LDBG("B", 2) << "Hello, world!"; - ExpectedOs << "[B:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) - << " Hello, world!\n"; - EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); - Str.clear(); - StrExpected.clear(); - - // Test with a type not enabled. - LDBG("C", 1) << "Hello, world!"; - EXPECT_EQ(DebugOs.str(), ""); - - // Test with a level not enabled. - LDBG("B", 3) << "Hello, world!"; - EXPECT_EQ(DebugOs.str(), ""); -} - -TEST(DebugLogTest, LDBG_OS_MACROS) { - llvm::DebugFlag = true; - static const char *DT[] = {"A:3", "B:2"}; - setCurrentDebugTypes(DT, sizeof(DT) / sizeof(DT[0])); - std::string Str; - raw_string_ostream DebugOs(Str); - std::string StrExpected; - raw_string_ostream ExpectedOs(StrExpected); -#undef LDBG_STREAM -#define LDBG_STREAM DebugOs -#define DEBUG_TYPE "A" - LDBG_OS([](raw_ostream &Os) { Os << "Hello, world!"; }); - ExpectedOs << "[A:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) - << " Hello, world!\n"; - EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); - Str.clear(); - StrExpected.clear(); - - // Test with a level, no type. - LDBG_OS(2, [](raw_ostream &Os) { Os << "Hello, world!"; }); - ExpectedOs << "[A:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) - << " Hello, world!\n"; - EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); - Str.clear(); - StrExpected.clear(); - -// Now the type will be explicit, check we don't use DEBUG_TYPE. -#undef DEBUG_TYPE - - // Test with a type. - LDBG_OS("B", [](raw_ostream &Os) { Os << "Hello, world!"; }); - ExpectedOs << "[B:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) - << " Hello, world!\n"; - EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); - Str.clear(); - StrExpected.clear(); - - // Test with a type and a level - LDBG_OS("B", 2, [](raw_ostream &Os) { Os << "Hello, world!"; }); - ExpectedOs << "[B:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) - << " Hello, world!\n"; - EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); - Str.clear(); - StrExpected.clear(); - - // Test with a type not enabled. - LDBG_OS("C", 1, [](raw_ostream &Os) { Os << "Hello, world!"; }); - EXPECT_EQ(DebugOs.str(), ""); - - // Test with a level not enabled. - LDBG_OS("B", 3, [](raw_ostream &Os) { Os << "Hello, world!"; }); - EXPECT_EQ(DebugOs.str(), ""); -} - #else TEST(DebugLogTest, Basic) { // LDBG should be compiled out in NDEBUG, so just check it compiles and has diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 132ed815c354e..aba6178a2ea6c 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -1151,7 +1151,7 @@ transform::CollectMatchingOp::apply(transform::TransformRewriter &rewriter, std::optional maybeFailure; for (Operation *root : state.getPayloadOps(getRoot())) { WalkResult walkResult = root->walk([&](Operation *op) { - LDBG(DEBUG_TYPE_MATCHER, 1) + LDBG(1, DEBUG_TYPE_MATCHER) << "matching " << OpWithFlags(op, OpPrintingFlags().assumeVerified().skipRegions()) << " @" << op; @@ -1166,7 +1166,7 @@ transform::CollectMatchingOp::apply(transform::TransformRewriter &rewriter, if (diag.isDefiniteFailure()) return WalkResult::interrupt(); if (diag.isSilenceableFailure()) { - LDBG(DEBUG_TYPE_MATCHER, 1) << "matcher " << matcher.getName() + LDBG(1, DEBUG_TYPE_MATCHER) << "matcher " << matcher.getName() << " failed: " << diag.getMessage(); return WalkResult::advance(); } @@ -1298,7 +1298,7 @@ transform::ForeachMatchOp::apply(transform::TransformRewriter &rewriter, if (!getRestrictRoot() && op == root) return WalkResult::advance(); - LDBG(DEBUG_TYPE_MATCHER, 1) + LDBG(1, DEBUG_TYPE_MATCHER) << "matching " << OpWithFlags(op, OpPrintingFlags().assumeVerified().skipRegions()) << " @" << op; @@ -1314,7 +1314,7 @@ transform::ForeachMatchOp::apply(transform::TransformRewriter &rewriter, if (diag.isDefiniteFailure()) return WalkResult::interrupt(); if (diag.isSilenceableFailure()) { - LDBG(DEBUG_TYPE_MATCHER, 1) << "matcher " << matcher.getName() + LDBG(1, DEBUG_TYPE_MATCHER) << "matcher " << matcher.getName() << " failed: " << diag.getMessage(); continue; } @@ -2165,10 +2165,10 @@ DiagnosedSilenceableFailure transform::MatchOperationEmptyOp::matchOperation( ::std::optional<::mlir::Operation *> maybeCurrent, transform::TransformResults &results, transform::TransformState &state) { if (!maybeCurrent.has_value()) { - LDBG(DEBUG_TYPE_MATCHER, 1) << "MatchOperationEmptyOp success"; + LDBG(1, DEBUG_TYPE_MATCHER) << "MatchOperationEmptyOp success"; return DiagnosedSilenceableFailure::success(); } - LDBG(DEBUG_TYPE_MATCHER, 1) << "MatchOperationEmptyOp failure"; + LDBG(1, DEBUG_TYPE_MATCHER) << "MatchOperationEmptyOp failure"; return emitSilenceableError() << "operation is not empty"; } From 54fc5367f63cca8e011d93bbd55764b0a7ecbbd5 Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Fri, 12 Sep 2025 10:34:49 +0000 Subject: [PATCH 111/734] [LV] Fix crash in uncountable exit with side effects checking Fixes an ICE reported on PR #145663, as an assert was found to be reachable with a specific combination of unreachable blocks. --- .../Vectorize/LoopVectorizationLegality.cpp | 11 +++--- .../early_exit_store_legality.ll | 35 +++++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 85a6fcaf3ecdd..ff35db14f7094 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1903,11 +1903,12 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved( SafetyInfo.computeLoopSafetyInfo(TheLoop); // We need to know that load will be executed before we can hoist a // copy out to run just before the first iteration. - // FIXME: Currently, other restrictions prevent us from reaching this point - // with a loop where the uncountable exit condition is determined - // by a conditional load. - assert(SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop) && - "Unhandled control flow in uncountable exit loop with side effects"); + if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) { + reportVectorizationFailure( + "Load for uncountable exit not guaranteed to execute", + "ConditionalUncountableExitLoad", ORE, TheLoop); + return false; + } // Prohibit any potential aliasing with any instruction in the loop which // might store to memory. diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll index 8ae404cf9571f..4226c5d9e650b 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll @@ -602,5 +602,40 @@ exit: ret void } +;; ICE was caused by assert for the load used in the uncountable exit condition +;; being guaranteed to execute. +@e = external addrspace(21) global [4 x i8] +define void @crash_conditional_load_for_uncountable_exit() { +; CHECK-LABEL: LV: Checking a loop in 'crash_conditional_load_for_uncountable_exit' +; CHECK: LV: Not vectorizing: Load for uncountable exit not guaranteed to execute. +entry: + br label %cont + +handler.out_of_bounds: + unreachable + +cont: + %h.06 = phi i64 [ 0, %entry ], [ %inc, %a.exit ] + %arrayidx = getelementptr i8, ptr addrspace(21) @e, i64 %h.06 + br i1 false, label %cont1, label %handler.type_mismatch + +handler.type_mismatch: + unreachable + +cont1: + %0 = load i8, ptr addrspace(21) %arrayidx, align 1 + store i16 0, ptr null, align 2 + %cmp.not.i.i = icmp eq i8 %0, 0 + br i1 %cmp.not.i.i, label %a.exit, label %if.then.i.i + +if.then.i.i: + unreachable + +a.exit: + %inc = add i64 %h.06, 1 + br i1 true, label %handler.out_of_bounds, label %cont +} + + declare void @init_mem(ptr, i64); declare i64 @get_an_unknown_offset(); From b936a7cf990391b5e7d10524619c955ce42d5932 Mon Sep 17 00:00:00 2001 From: Karlo Basioli Date: Fri, 12 Sep 2025 11:58:56 +0100 Subject: [PATCH 112/734] Fix bazel build failure (introduced in #157507) (#158268) --- utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel index daf639fbcee31..6a0af96e96831 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel @@ -10,6 +10,7 @@ package(default_visibility = ["//visibility:public"]) srcs = [src], data = [ "Vector/td/unroll-elements.mlir", + "Vector/td/xfer-drop-unit-dims.mlir", "Vector/vector-sink-transform.mlir", "//llvm:llvm-symbolizer", "//mlir:mlir-opt", @@ -35,6 +36,7 @@ package(default_visibility = ["//visibility:public"]) "Transform/*-symbol-decl-and-schedule.mlir", "Transform/include/**/*.mlir", "Vector/td/unroll-elements.mlir", + "Vector/td/xfer-drop-unit-dims.mlir", "Vector/vector-sink-transform.mlir", ], ) From 1e10b782ecac29eff6057889d64519666f0d43b2 Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Fri, 12 Sep 2025 12:00:39 +0100 Subject: [PATCH 113/734] [AArch64][InstCombine] Canonicalize whilelo intrinsic (#151553) InstCombine llvm.aarch64.sve.whilelo to the generic LLVM llvm.get.active.lane.mask intrinsic --- .../AArch64/AArch64TargetTransformInfo.cpp | 11 ++ .../AArch64/sve-intrinsic-whilelo.ll | 102 ++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b2d9e1e63f207..8c4b4f6e4d6de 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2747,6 +2747,15 @@ static std::optional instCombineDMB(InstCombiner &IC, return std::nullopt; } +static std::optional instCombineWhilelo(InstCombiner &IC, + IntrinsicInst &II) { + return IC.replaceInstUsesWith( + II, + IC.Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, + {II.getType(), II.getOperand(0)->getType()}, + {II.getOperand(0), II.getOperand(1)})); +} + static std::optional instCombinePTrue(InstCombiner &IC, IntrinsicInst &II) { if (match(II.getOperand(0), m_ConstantInt())) @@ -2883,6 +2892,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, return instCombineSVEDupqLane(IC, II); case Intrinsic::aarch64_sve_insr: return instCombineSVEInsr(IC, II); + case Intrinsic::aarch64_sve_whilelo: + return instCombineWhilelo(IC, II); case Intrinsic::aarch64_sve_ptrue: return instCombinePTrue(IC, II); case Intrinsic::aarch64_sve_uxtb: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll new file mode 100644 index 0000000000000..181a41786418f --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @whilelo_nxv2i1.i32(i32 %a, i32 %b) { +; CHECK-LABEL: define @whilelo_nxv2i1.i32( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %a, i32 %b) + ret %mask +} + +define @whilelo_nxv4i1.i32(i32 %a, i32 %b) { +; CHECK-LABEL: define @whilelo_nxv4i1.i32( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %a, i32 %b) + ret %mask +} + +define @whilelo_nxv8i1.i32(i32 %a, i32 %b) { +; CHECK-LABEL: define @whilelo_nxv8i1.i32( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %a, i32 %b) + ret %mask +} + +define @whilelo_nxv16i1.i32(i32 %a, i32 %b) { +; CHECK-LABEL: define @whilelo_nxv16i1.i32( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %a, i32 %b) + ret %mask +} + +define @whilelo_nxv2i1.i64(i64 %a, i64 %b) { +; CHECK-LABEL: define @whilelo_nxv2i1.i64( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b) + ret %mask +} + +define @whilelo_nxv4i1.i64(i64 %a, i64 %b) { +; CHECK-LABEL: define @whilelo_nxv4i1.i64( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %a, i64 %b) + ret %mask +} + +define @whilelo_nxv8i1.i64(i64 %a, i64 %b) { +; CHECK-LABEL: define @whilelo_nxv8i1.i64( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %a, i64 %b) + ret %mask +} + +define @whilelo_nxv16i1.i64(i64 %a, i64 %b) { +; CHECK-LABEL: define @whilelo_nxv16i1.i64( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %a, i64 %b) + ret %mask +} + +define @whilelo_nxv16i1.i64_const() { +; CHECK-LABEL: define @whilelo_nxv16i1.i64_const() { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 16) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 16) + ret %mask +} + +define @whilelo_nxv16i1.i32_const() { +; CHECK-LABEL: define @whilelo_nxv16i1.i32_const() { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 16) +; CHECK-NEXT: ret [[MASK]] +; + %mask = tail call @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 0, i32 16) + ret %mask +} From de8859da5485ab06b7331c37f98aa778cb469e6b Mon Sep 17 00:00:00 2001 From: Wenju He Date: Fri, 12 Sep 2025 19:02:30 +0800 Subject: [PATCH 114/734] [libclc] Create LIBCLC_OUTPUT_LIBRARY_DIR directory before build (#158171) This fixes `No such file or directory` error when "Unix Makefiles" generator is used, see https://github.com/intel/llvm/issues/20058. Ninja generator implicitly creates output directory when generating libclc libraries, but "Unix Makefiles" generator does not. --- libclc/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index c75f450d8d3ad..7960f3494770e 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -104,6 +104,7 @@ else() # Note we do not adhere to LLVM_ENABLE_PER_TARGET_RUNTIME_DIR. set( LIBCLC_OUTPUT_LIBRARY_DIR ${LIBCLC_OUTPUT_DIR}/lib/libclc ) + file( MAKE_DIRECTORY ${LIBCLC_OUTPUT_LIBRARY_DIR} ) endif() if( EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} ) From 2832717ecc84a93519aaf9a6558b8ce4e1c40933 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 12 Sep 2025 12:30:54 +0100 Subject: [PATCH 115/734] Introduce LDBG_OS() macro as a variant of LDBG() (#158277) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also, improve LDBG() to accept debug type and level in any order, and add unit-tests for LDBG() and LGDB_OS(). LDBG_OS() is a macro that behaves like LDBG() but instead of directly using it to stream the output, it takes a callback function that will be called with a raw_ostream. This is a re-land with workarounds for older gcc and clang versions. Previous attempts in #157194 and #158260 Co-authored-by: Andrzej Warzyński --- llvm/include/llvm/Support/Debug.h | 5 - llvm/include/llvm/Support/DebugLog.h | 280 ++++++++++++++---- llvm/unittests/Support/DebugLogTest.cpp | 129 +++++++- .../lib/Dialect/Transform/IR/TransformOps.cpp | 12 +- 4 files changed, 343 insertions(+), 83 deletions(-) diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h index a7795d403721c..b73f2d7c8b852 100644 --- a/llvm/include/llvm/Support/Debug.h +++ b/llvm/include/llvm/Support/Debug.h @@ -44,11 +44,6 @@ class raw_ostream; /// level, return false. LLVM_ABI bool isCurrentDebugType(const char *Type, int Level = 0); -/// Overload allowing to swap the order of the Type and Level arguments. -LLVM_ABI inline bool isCurrentDebugType(int Level, const char *Type) { - return isCurrentDebugType(Type, Level); -} - /// setCurrentDebugType - Set the current debug type, as if the -debug-only=X /// option were specified. Note that DebugFlag also needs to be set to true for /// debug output to be produced. diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h index dce706e196bde..c1e1648c3b546 100644 --- a/llvm/include/llvm/Support/DebugLog.h +++ b/llvm/include/llvm/Support/DebugLog.h @@ -19,52 +19,55 @@ namespace llvm { #ifndef NDEBUG -// LDBG() is a macro that can be used as a raw_ostream for debugging. -// It will stream the output to the dbgs() stream, with a prefix of the -// debug type and the file and line number. A trailing newline is added to the -// output automatically. If the streamed content contains a newline, the prefix -// is added to each beginning of a new line. Nothing is printed if the debug -// output is not enabled or the debug type does not match. -// -// E.g., -// LDBG() << "Bitset contains: " << Bitset; -// is somehow equivalent to -// LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] " << __FILE__ << ":" << -// __LINE__ << " " -// << "Bitset contains: " << Bitset << "\n"); -// +/// LDBG() is a macro that can be used as a raw_ostream for debugging. +/// It will stream the output to the dbgs() stream, with a prefix of the +/// debug type and the file and line number. A trailing newline is added to the +/// output automatically. If the streamed content contains a newline, the prefix +/// is added to each beginning of a new line. Nothing is printed if the debug +/// output is not enabled or the debug type does not match. +/// +/// E.g., +/// LDBG() << "Bitset contains: " << Bitset; +/// is equivalent to +/// LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] " << __FILE__ << ":" << +/// __LINE__ << " " +/// << "Bitset contains: " << Bitset << "\n"); +/// // An optional `level` argument can be provided to control the verbosity of the -// output. The default level is 1, and is in increasing level of verbosity. -// -// The `level` argument can be a literal integer, or a macro that evaluates to -// an integer. -// -// An optional `type` argument can be provided to control the debug type. The -// default type is DEBUG_TYPE. The `type` argument can be a literal string, or a -// macro that evaluates to a string. +/// output. The default level is 1, and is in increasing level of verbosity. +/// +/// The `level` argument can be a literal integer, or a macro that evaluates to +/// an integer. +/// +/// An optional `type` argument can be provided to control the debug type. The +/// default type is DEBUG_TYPE. The `type` argument can be a literal string, or +/// a macro that evaluates to a string. +/// +/// E.g., +/// LDBG(2) << "Bitset contains: " << Bitset; +/// LDBG("debug_type") << "Bitset contains: " << Bitset; +/// LDBG("debug_type", 2) << "Bitset contains: " << Bitset; #define LDBG(...) _GET_LDBG_MACRO(__VA_ARGS__)(__VA_ARGS__) -// Helper macros to choose the correct macro based on the number of arguments. -#define LDBG_FUNC_CHOOSER(_f1, _f2, _f3, ...) _f3 -#define LDBG_FUNC_RECOMPOSER(argsWithParentheses) \ - LDBG_FUNC_CHOOSER argsWithParentheses -#define LDBG_CHOOSE_FROM_ARG_COUNT(...) \ - LDBG_FUNC_RECOMPOSER( \ - (__VA_ARGS__, LDBG_LOG_LEVEL_WITH_TYPE, LDBG_LOG_LEVEL, )) -#define LDBG_NO_ARG_EXPANDER() , , LDBG_LOG_LEVEL_1 -#define _GET_LDBG_MACRO(...) \ - LDBG_CHOOSE_FROM_ARG_COUNT(LDBG_NO_ARG_EXPANDER __VA_ARGS__()) - -// Dispatch macros to support the `level` argument or none (default to 1) -#define LDBG_LOG_LEVEL(LEVEL) \ - DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), LEVEL, DEBUG_TYPE) -#define LDBG_LOG_LEVEL_1() LDBG_LOG_LEVEL(1) -// This macro is a helper when LDBG() is called with 2 arguments. -// In this case we want to allow the order of the arguments to be swapped. -// We rely on the fact that the `level` argument is an integer, and the `type` -// is a string and dispatch to a C++ API that is overloaded. -#define LDBG_LOG_LEVEL_WITH_TYPE(LEVEL_OR_TYPE, TYPE_OR_LEVEL) \ - DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), (LEVEL_OR_TYPE), (TYPE_OR_LEVEL)) +/// LDBG_OS() is a macro that behaves like LDBG() but instead of directly using +/// it to stream the output, it takes a callback function that will be called +/// with a raw_ostream. +/// This is useful when you need to pass a `raw_ostream` to a helper function to +/// be able to print (when the `<<` operator is not available). +/// +/// E.g., +/// LDBG_OS([&] (raw_ostream &Os) { +/// Os << "Pass Manager contains: "; +/// pm.printAsTextual(Os); +/// }); +/// +/// Just like LDBG(), it optionally accepts a `level` and `type` arguments. +/// E.g., +/// LDBG_OS(2, [&] (raw_ostream &Os) { ... }); +/// LDBG_OS("debug_type", [&] (raw_ostream &Os) { ... }); +/// LDBG_OS("debug_type", 2, [&] (raw_ostream &Os) { ... }); +/// +#define LDBG_OS(...) _GET_LDBG_OS_MACRO(__VA_ARGS__)(__VA_ARGS__) // We want the filename without the full path. We are using the __FILE__ macro // and a constexpr function to strip the path prefix. We can avoid the frontend @@ -76,22 +79,168 @@ namespace llvm { #define __LLVM_FILE_NAME__ ::llvm::impl::getShortFileName(__FILE__) #endif -#define DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(STREAM, LEVEL, TYPE, FILE, \ - LINE) \ - for (bool _c = \ - (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE, LEVEL)); \ +// Everything below are implementation details of the macros above. +namespace impl { + +/// This macro expands to the stream to use for output, we use a macro to allow +/// unit-testing to override. +#define LDBG_STREAM ::llvm::dbgs() + +// ---------------------------------------------------------------------------- +// LDBG() implementation +// ---------------------------------------------------------------------------- + +// Helper macros to choose the correct LDBG() macro based on the number of +// arguments. +#define LDBG_FUNC_CHOOSER(_f1, _f2, _f3, ...) _f3 +#define LDBG_FUNC_RECOMPOSER(argsWithParentheses) \ + LDBG_FUNC_CHOOSER argsWithParentheses +#define LDBG_CHOOSE_FROM_ARG_COUNT(...) \ + LDBG_FUNC_RECOMPOSER((__VA_ARGS__, LDBG_TYPE_AND_LEVEL, LDBG_LEVEL_OR_TYPE, )) +#define LDBG_NO_ARG_EXPANDER() , , LDBG_NO_ARG +#define _GET_LDBG_MACRO(...) \ + LDBG_CHOOSE_FROM_ARG_COUNT(LDBG_NO_ARG_EXPANDER __VA_ARGS__()) + +/// This macro is the core of the LDBG() implementation. It is used to print the +/// debug output with the given stream, level, type, file, and line number. +#define LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(STREAM, LEVEL_OR_TYPE, \ + TYPE_OR_LEVEL, FILE, LINE) \ + for (bool _c = ::llvm::DebugFlag && ::llvm::impl::ldbgIsCurrentDebugType( \ + TYPE_OR_LEVEL, LEVEL_OR_TYPE); \ _c; _c = false) \ - for (::llvm::impl::raw_ldbg_ostream LdbgOS{ \ - ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), (STREAM)}; \ - _c; _c = false) \ - ::llvm::impl::RAIINewLineStream{LdbgOS}.asLvalue() + ::llvm::impl::raw_ldbg_ostream{ \ + ::llvm::impl::computePrefix(TYPE_OR_LEVEL, FILE, LINE, LEVEL_OR_TYPE), \ + (STREAM), /*ShouldPrefixNextString=*/true, \ + /*ShouldEmitNewLineOnDestruction=*/true} \ + .asLvalue() -#define DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, FILE) \ - DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(STREAM, LEVEL, TYPE, FILE, __LINE__) -#define DEBUGLOG_WITH_STREAM_AND_TYPE(STREAM, LEVEL, TYPE) \ - DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, __LLVM_FILE_NAME__) +/// These macros are helpers to implement LDBG() with an increasing amount of +/// optional arguments made explicit. +#define LDBG_STREAM_LEVEL_TYPE_AND_FILE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL, \ + FILE) \ + LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL, \ + FILE, __LINE__) +#define LDGB_STREAM_LEVEL_AND_TYPE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL) \ + LDBG_STREAM_LEVEL_TYPE_AND_FILE(STREAM, LEVEL_OR_TYPE, TYPE_OR_LEVEL, \ + __LLVM_FILE_NAME__) +/// This macro is a helper when LDBG() is called with 2 arguments. +/// In this case we want to force the first argument to be the type for +/// consistency in the codebase. +/// We trick this by casting the first argument to a (const char *) which +/// won't compile with an int. +#define LDBG_TYPE_AND_LEVEL(TYPE, LEVEL) \ + LDGB_STREAM_LEVEL_AND_TYPE(LDBG_STREAM, static_cast(TYPE), \ + (LEVEL)) -namespace impl { +/// When a single argument is provided. This can be either a level or the debug +/// type. If a level is provided, we default the debug type to DEBUG_TYPE, if a +/// string is provided, we default the level to 1. +#define LDBG_LEVEL_OR_TYPE(LEVEL_OR_TYPE) \ + LDGB_STREAM_LEVEL_AND_TYPE(LDBG_STREAM, (LEVEL_OR_TYPE), \ + LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE)) +#define LDBG_NO_ARG() LDBG_LEVEL_OR_TYPE(1) + +// ---------------------------------------------------------------------------- +// LDBG_OS() implementation +// ---------------------------------------------------------------------------- + +// Helper macros to choose the correct LDBG_OS() macro based on the number of +// arguments. +#define LDBG_OS_FUNC_CHOOSER(_f1, _f2, _f3, _f4, ...) _f4 +#define LDBG_OS_FUNC_RECOMPOSER(argsWithParentheses) \ + LDBG_OS_FUNC_CHOOSER argsWithParentheses +#define LDBG_OS_CHOOSE_FROM_ARG_COUNT(...) \ + LDBG_OS_FUNC_RECOMPOSER((__VA_ARGS__, LDBG_OS_TYPE_AND_LEVEL_AND_CALLBACK, \ + LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK, \ + LDBG_OS_CALLBACK, )) +#define LDBG_OS_NO_ARG_EXPANDER() , , , LDBG_OS_CALLBACK +#define _GET_LDBG_OS_MACRO(...) \ + LDBG_OS_CHOOSE_FROM_ARG_COUNT(LDBG_OS_NO_ARG_EXPANDER __VA_ARGS__()) + +/// This macro is the core of the LDBG_OS() macros. It is used to print the +/// debug output with the given stream, level, type, file, and line number. +#define LDBG_OS_IMPL(TYPE_OR_LEVEL, LEVEL_OR_TYPE, CALLBACK, STREAM, FILE, \ + LINE) \ + if (::llvm::DebugFlag && \ + ::llvm::impl::ldbgIsCurrentDebugType(TYPE_OR_LEVEL, LEVEL_OR_TYPE)) { \ + ::llvm::impl::raw_ldbg_ostream LdbgOS{ \ + ::llvm::impl::computePrefix(TYPE_OR_LEVEL, FILE, LINE, LEVEL_OR_TYPE), \ + (STREAM), /*ShouldPrefixNextString=*/true, \ + /*ShouldEmitNewLineOnDestruction=*/true}; \ + CALLBACK(LdbgOS); \ + } + +#define LDBG_OS_TYPE_AND_LEVEL_AND_CALLBACK(TYPE, LEVEL, CALLBACK) \ + LDBG_OS_IMPL(static_cast(TYPE), LEVEL, CALLBACK, LDBG_STREAM, \ + __LLVM_FILE_NAME__, __LINE__) +#define LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK(LEVEL_OR_TYPE, CALLBACK) \ + LDBG_OS_IMPL(LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE), LEVEL_OR_TYPE, \ + CALLBACK, LDBG_STREAM, __LLVM_FILE_NAME__, __LINE__) +#define LDBG_OS_CALLBACK(CALLBACK) \ + LDBG_OS_LEVEL_OR_TYPE_AND_CALLBACK(1, CALLBACK) + +// ---------------------------------------------------------------------------- +// General Helpers for the implementation above +// ---------------------------------------------------------------------------- + +/// Return the stringified macro as a StringRef. +/// Also, strip out potential surrounding quotes: this comes from an artifact of +/// the macro stringification, if DEBUG_TYPE is undefined we get the string +/// "DEBUG_TYPE", however if it is defined we get the string with the quotes. +/// For example if DEBUG_TYPE is "foo", we get "\"foo\"" but we want to return +/// "foo" here. +constexpr ::llvm::StringRef strip_quotes(const char *Str) { + ::llvm::StringRef S(Str); + if (Str[0] == '"' && Str[S.size() - 1] == '"') + return StringRef(Str + 1, S.size() - 2); + return S; +} + +/// Fail compilation if DEBUG_TYPE is not defined. +/// This is a workaround for GCC <=12 and clang <=16 which do not support +/// static_assert in templated constexpr functions. +#if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 12) || \ + (defined(__clang__) && __clang_major__ <= 16) +#define MISSING_DEBUG_TYPE() \ + extern void missing_DEBUG_TYPE(void); \ + missing_DEBUG_TYPE(); +#else +#define MISSING_DEBUG_TYPE() static_assert(false, "DEBUG_TYPE is not defined"); +#endif + +/// Helper to provide the default level (=1) or type (=DEBUG_TYPE). This is used +/// when a single argument is passed to LDBG() (or LDBG_OS()), if it is an +/// integer we return DEBUG_TYPE and if it is a string we return 1. This fails +/// with a static_assert if we pass an integer and DEBUG_TYPE is not defined. +#define LDBG_GET_DEFAULT_TYPE_OR_LEVEL(LEVEL_OR_TYPE) \ + [](auto LevelOrType) { \ + if constexpr (std::is_integral_v) { \ + constexpr const char *DebugType = LDBG_GET_DEBUG_TYPE_STR(); \ + if constexpr (DebugType[0] == '"') { \ + return ::llvm::impl::strip_quotes(DebugType); \ + } else { \ + MISSING_DEBUG_TYPE(); \ + } \ + } else { \ + return 1; \ + } \ + }(LEVEL_OR_TYPE) + +/// Helpers to get DEBUG_TYPE as a StringRef, even when DEBUG_TYPE is not +/// defined (in which case it expands to "DEBUG_TYPE") +#define LDBG_GET_DEBUG_TYPE_STR__(X) #X +#define LDBG_GET_DEBUG_TYPE_STR_(X) LDBG_GET_DEBUG_TYPE_STR__(X) +#define LDBG_GET_DEBUG_TYPE_STR() LDBG_GET_DEBUG_TYPE_STR_(DEBUG_TYPE) + +/// Helper to call isCurrentDebugType with a StringRef. +static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(StringRef Type, + int Level) { + return ::llvm::isCurrentDebugType(Type.str().c_str(), Level); +} +static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(int Level, + StringRef Type) { + return ::llvm::isCurrentDebugType(Type.str().c_str(), Level); +} /// A raw_ostream that tracks `\n` and print the prefix after each /// newline. @@ -99,6 +248,7 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { std::string Prefix; raw_ostream &Os; bool ShouldPrefixNextString; + bool ShouldEmitNewLineOnDestruction; /// Split the line on newlines and insert the prefix before each /// newline. Forward everything to the underlying stream. @@ -131,12 +281,17 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { public: explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os, - bool ShouldPrefixNextString = true) + bool ShouldPrefixNextString = true, + bool ShouldEmitNewLineOnDestruction = false) : Prefix(std::move(Prefix)), Os(Os), - ShouldPrefixNextString(ShouldPrefixNextString) { + ShouldPrefixNextString(ShouldPrefixNextString), + ShouldEmitNewLineOnDestruction(ShouldEmitNewLineOnDestruction) { SetUnbuffered(); } - ~raw_ldbg_ostream() final {} + ~raw_ldbg_ostream() final { + if (ShouldEmitNewLineOnDestruction) + Os << '\n'; + } /// Forward the current_pos method to the underlying stream. uint64_t current_pos() const final { return Os.tell(); } @@ -173,17 +328,17 @@ getShortFileName(const char *path) { /// "[DebugType] File:Line " /// Where the File is the file name without the path prefix. static LLVM_ATTRIBUTE_UNUSED std::string -computePrefix(const char *DebugType, const char *File, int Line, int Level) { +computePrefix(StringRef DebugType, const char *File, int Line, int Level) { std::string Prefix; raw_string_ostream OsPrefix(Prefix); - if (DebugType) + if (!DebugType.empty()) OsPrefix << "[" << DebugType << ":" << Level << "] "; OsPrefix << File << ":" << Line << " "; return OsPrefix.str(); } /// Overload allowing to swap the order of the DebugType and Level arguments. static LLVM_ATTRIBUTE_UNUSED std::string -computePrefix(int Level, const char *File, int Line, const char *DebugType) { +computePrefix(int Level, const char *File, int Line, StringRef DebugType) { return computePrefix(DebugType, File, Line, Level); } @@ -194,6 +349,7 @@ computePrefix(int Level, const char *File, int Line, const char *DebugType) { #define LDBG(...) \ for (bool _c = false; _c; _c = false) \ ::llvm::nulls() +#define LDBG_OS(...) #endif } // end namespace llvm diff --git a/llvm/unittests/Support/DebugLogTest.cpp b/llvm/unittests/Support/DebugLogTest.cpp index e087705b72586..da3851ed86b35 100644 --- a/llvm/unittests/Support/DebugLogTest.cpp +++ b/llvm/unittests/Support/DebugLogTest.cpp @@ -27,7 +27,7 @@ TEST(DebugLogTest, Basic) { { std::string str; raw_string_ostream os(str); - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, nullptr) << "NoType"; + LDGB_STREAM_LEVEL_AND_TYPE(os, "", 0) << "NoType"; EXPECT_FALSE(StringRef(os.str()).starts_with('[')); EXPECT_TRUE(StringRef(os.str()).ends_with("NoType\n")); } @@ -36,8 +36,8 @@ TEST(DebugLogTest, Basic) { { std::string str; raw_string_ostream os(str); - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "A"; - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << "B"; + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "A"; + LDGB_STREAM_LEVEL_AND_TYPE(os, "B", 0) << "B"; EXPECT_TRUE(StringRef(os.str()).starts_with('[')); EXPECT_THAT(os.str(), AllOf(HasSubstr("A\n"), HasSubstr("B\n"))); } @@ -48,18 +48,18 @@ TEST(DebugLogTest, Basic) { raw_string_ostream os(str); // Just check that the macro doesn't result in dangling else. if (true) - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "A"; + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "A"; else - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << "B"; - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << "B"; + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << "B"; + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "B") << "B"; EXPECT_THAT(os.str(), AllOf(HasSubstr("A\n"), Not(HasSubstr("B\n")))); int count = 0; auto inc = [&]() { return ++count; }; EXPECT_THAT(count, Eq(0)); - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "A") << inc(); + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "A") << inc(); EXPECT_THAT(count, Eq(1)); - DEBUGLOG_WITH_STREAM_AND_TYPE(os, 0, "B") << inc(); + LDGB_STREAM_LEVEL_AND_TYPE(os, 0, "B") << inc(); EXPECT_THAT(count, Eq(1)); } } @@ -75,7 +75,7 @@ TEST(DebugLogTest, BasicWithLevel) { raw_string_ostream os(str); for (auto type : {"A", "B", "C", "D"}) for (int level : llvm::seq(0, 4)) - DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(os, level, type, type, level) + LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(os, level, type, type, level) << level; EXPECT_EQ(os.str(), "[A:0] A:0 0\n[A:1] A:1 1\n[A:2] A:2 2\n[A:3] A:3 " "3\n[B:0] B:0 0\n[B:1] B:1 1\n[C:0] C:0 0\n"); @@ -92,7 +92,7 @@ TEST(DebugLogTest, NegativeLevel) { raw_string_ostream os(str); for (auto type : {"A", "B"}) for (int level : llvm::seq(0, 2)) - DEBUGLOG_WITH_STREAM_TYPE_FILE_AND_LINE(os, level, type, type, level) + LDBG_STREAM_LEVEL_TYPE_FILE_AND_LINE(os, level, type, type, level) << level; EXPECT_EQ(os.str(), "[A:0] A:0 0\n[B:0] B:0 0\n[B:1] B:1 1\n"); } @@ -128,6 +128,115 @@ TEST(DebugLogTest, DestructorPrefix) { // After destructors, nothing should have been printed. EXPECT_EQ(os.str(), ""); } + +TEST(DebugLogTest, LDBG_MACROS) { + llvm::DebugFlag = true; + static const char *DT[] = {"A:3", "B:2"}; + setCurrentDebugTypes(DT, sizeof(DT) / sizeof(DT[0])); + std::string Str; + raw_string_ostream DebugOs(Str); + std::string StrExpected; + raw_string_ostream ExpectedOs(StrExpected); +#undef LDBG_STREAM +#define LDBG_STREAM DebugOs +#define DEBUG_TYPE "A" + LDBG() << "Hello, world!"; + ExpectedOs << "[A:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a level, no type. + LDBG(2) << "Hello, world!"; + ExpectedOs << "[A:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + +// Now the type will be explicit, check we don't use DEBUG_TYPE. +#undef DEBUG_TYPE + + // Test with a type + LDBG("B") << "Hello, world!"; + ExpectedOs << "[B:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a type and a level + LDBG("B", 2) << "Hello, world!"; + ExpectedOs << "[B:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a type not enabled. + LDBG("C", 1) << "Hello, world!"; + EXPECT_EQ(DebugOs.str(), ""); + + // Test with a level not enabled. + LDBG("B", 3) << "Hello, world!"; + EXPECT_EQ(DebugOs.str(), ""); +} + +TEST(DebugLogTest, LDBG_OS_MACROS) { + llvm::DebugFlag = true; + static const char *DT[] = {"A:3", "B:2"}; + setCurrentDebugTypes(DT, sizeof(DT) / sizeof(DT[0])); + std::string Str; + raw_string_ostream DebugOs(Str); + std::string StrExpected; + raw_string_ostream ExpectedOs(StrExpected); +#undef LDBG_STREAM +#define LDBG_STREAM DebugOs +#define DEBUG_TYPE "A" + LDBG_OS([](raw_ostream &Os) { Os << "Hello, world!"; }); + ExpectedOs << "[A:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a level, no type. + LDBG_OS(2, [](raw_ostream &Os) { Os << "Hello, world!"; }); + ExpectedOs << "[A:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + +// Now the type will be explicit, check we don't use DEBUG_TYPE. +#undef DEBUG_TYPE + + // Test with a type. + LDBG_OS("B", [](raw_ostream &Os) { Os << "Hello, world!"; }); + ExpectedOs << "[B:1] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a type and a level + LDBG_OS("B", 2, [](raw_ostream &Os) { Os << "Hello, world!"; }); + ExpectedOs << "[B:2] " << __LLVM_FILE_NAME__ << ":" << (__LINE__ - 1) + << " Hello, world!\n"; + EXPECT_EQ(DebugOs.str(), ExpectedOs.str()); + Str.clear(); + StrExpected.clear(); + + // Test with a type not enabled. + LDBG_OS("C", 1, [](raw_ostream &Os) { Os << "Hello, world!"; }); + EXPECT_EQ(DebugOs.str(), ""); + + // Test with a level not enabled. + LDBG_OS("B", 3, [](raw_ostream &Os) { Os << "Hello, world!"; }); + EXPECT_EQ(DebugOs.str(), ""); +} + #else TEST(DebugLogTest, Basic) { // LDBG should be compiled out in NDEBUG, so just check it compiles and has diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index aba6178a2ea6c..132ed815c354e 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -1151,7 +1151,7 @@ transform::CollectMatchingOp::apply(transform::TransformRewriter &rewriter, std::optional maybeFailure; for (Operation *root : state.getPayloadOps(getRoot())) { WalkResult walkResult = root->walk([&](Operation *op) { - LDBG(1, DEBUG_TYPE_MATCHER) + LDBG(DEBUG_TYPE_MATCHER, 1) << "matching " << OpWithFlags(op, OpPrintingFlags().assumeVerified().skipRegions()) << " @" << op; @@ -1166,7 +1166,7 @@ transform::CollectMatchingOp::apply(transform::TransformRewriter &rewriter, if (diag.isDefiniteFailure()) return WalkResult::interrupt(); if (diag.isSilenceableFailure()) { - LDBG(1, DEBUG_TYPE_MATCHER) << "matcher " << matcher.getName() + LDBG(DEBUG_TYPE_MATCHER, 1) << "matcher " << matcher.getName() << " failed: " << diag.getMessage(); return WalkResult::advance(); } @@ -1298,7 +1298,7 @@ transform::ForeachMatchOp::apply(transform::TransformRewriter &rewriter, if (!getRestrictRoot() && op == root) return WalkResult::advance(); - LDBG(1, DEBUG_TYPE_MATCHER) + LDBG(DEBUG_TYPE_MATCHER, 1) << "matching " << OpWithFlags(op, OpPrintingFlags().assumeVerified().skipRegions()) << " @" << op; @@ -1314,7 +1314,7 @@ transform::ForeachMatchOp::apply(transform::TransformRewriter &rewriter, if (diag.isDefiniteFailure()) return WalkResult::interrupt(); if (diag.isSilenceableFailure()) { - LDBG(1, DEBUG_TYPE_MATCHER) << "matcher " << matcher.getName() + LDBG(DEBUG_TYPE_MATCHER, 1) << "matcher " << matcher.getName() << " failed: " << diag.getMessage(); continue; } @@ -2165,10 +2165,10 @@ DiagnosedSilenceableFailure transform::MatchOperationEmptyOp::matchOperation( ::std::optional<::mlir::Operation *> maybeCurrent, transform::TransformResults &results, transform::TransformState &state) { if (!maybeCurrent.has_value()) { - LDBG(1, DEBUG_TYPE_MATCHER) << "MatchOperationEmptyOp success"; + LDBG(DEBUG_TYPE_MATCHER, 1) << "MatchOperationEmptyOp success"; return DiagnosedSilenceableFailure::success(); } - LDBG(1, DEBUG_TYPE_MATCHER) << "MatchOperationEmptyOp failure"; + LDBG(DEBUG_TYPE_MATCHER, 1) << "MatchOperationEmptyOp failure"; return emitSilenceableError() << "operation is not empty"; } From a2a9601ea49afff950f9267b378b30ef799d6ad9 Mon Sep 17 00:00:00 2001 From: Gaurav Verma <48321602+xintin@users.noreply.github.com> Date: Fri, 12 Sep 2025 04:45:56 -0700 Subject: [PATCH 116/734] [mlir][AMDGPU] Updated `PermlaneSwapOp` to select correct val (#157586) * as per the instruction description, updated `PermlaneSwapOp` to select correct val * updated corresponding lit tests Issue it resolves: the block reduction was failing otherwise as we were selecting the `{0}` always. --------- Signed-off-by: xintin --- .../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 11 ++- .../Conversion/AMDGPUToROCDL/permlane.mlir | 97 ++++++++++++++----- .../Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 14 ++- 3 files changed, 91 insertions(+), 31 deletions(-) mode change 100644 => 100755 mlir/test/Conversion/AMDGPUToROCDL/permlane.mlir mode change 100644 => 100755 mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 203790ed95153..0078eed8b7a67 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -1915,7 +1915,16 @@ struct AMDGPUPermlaneLowering : public ConvertOpToLLVMPattern { else llvm_unreachable("unsupported row length"); - Value vdstNew = LLVM::ExtractValueOp::create(rewriter, loc, res, {0}); + const Value vdst0 = LLVM::ExtractValueOp::create(rewriter, loc, res, {0}); + const Value vdst1 = LLVM::ExtractValueOp::create(rewriter, loc, res, {1}); + + const Value isEqual = + rewriter.create(loc, LLVM::ICmpPredicate::eq, vdst0, v); + + // Per `permlane(16|32)` semantics: if the first extracted element equals + // 'v', the result is the second element; otherwise it is the first. + Value vdstNew = + rewriter.create(loc, isEqual, vdst1, vdst0); permuted.emplace_back(vdstNew); } diff --git a/mlir/test/Conversion/AMDGPUToROCDL/permlane.mlir b/mlir/test/Conversion/AMDGPUToROCDL/permlane.mlir old mode 100644 new mode 100755 index aae2b1d0fd90c..a92321da8f357 --- a/mlir/test/Conversion/AMDGPUToROCDL/permlane.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/permlane.mlir @@ -4,8 +4,11 @@ // CHECK-SAME: (%[[ARG0:.*]]: i32) func.func @test_permlane16_i32(%arg0 : i32) -> i32 { // CHECK: %[[PERM:.*]] = rocdl.permlane16.swap %[[ARG0]], %[[ARG0]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> -// CHECK: return %[[RES]] : i32 +// CHECK: %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ARG0]] : i32 +// CHECK: %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32 +// CHECK: return %[[SEL]] : i32 %0 = amdgpu.permlane_swap %arg0 16 : i32 return %0 : i32 } @@ -14,8 +17,11 @@ func.func @test_permlane16_i32(%arg0 : i32) -> i32 { // CHECK-SAME: (%[[ARG0:.*]]: i32) func.func @test_permlane16_i32_optional_attr(%arg0 : i32) -> i32 { // CHECK: %[[PERM:.*]] = rocdl.permlane16.swap %[[ARG0]], %[[ARG0]], true, true : (i32, i32) -> <(i32, i32)> -// CHECK: %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> -// CHECK: return %[[RES]] : i32 +// CHECK: %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ARG0]] : i32 +// CHECK: %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32 +// CHECK: return %[[SEL]] : i32 %0 = amdgpu.permlane_swap %arg0 16 { fetch_inactive = true, bound_ctrl = true } : i32 return %0 : i32 } @@ -24,8 +30,11 @@ func.func @test_permlane16_i32_optional_attr(%arg0 : i32) -> i32 { // CHECK-SAME: (%[[ARG0:.*]]: i32) func.func @test_permlane32_i32(%arg0 : i32) -> i32 { // CHECK: %[[PERM:.*]] = rocdl.permlane32.swap %[[ARG0]], %[[ARG0]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> -// CHECK: return %[[RES]] : i32 +// CHECK: %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ARG0]] : i32 +// CHECK: %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32 +// CHECK: return %[[SEL]] : i32 %0 = amdgpu.permlane_swap %arg0 32 : i32 return %0 : i32 } @@ -35,8 +44,11 @@ func.func @test_permlane32_i32(%arg0 : i32) -> i32 { func.func @test_permlane16_f32(%arg0 : f32) -> f32 { // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG0]] : f32 to i32 // CHECK: %[[PERM:.*]] = rocdl.permlane16.swap %[[CAST]], %[[CAST]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> -// CHECK: %[[RES_CAST:.*]] = llvm.bitcast %[[RES]] : i32 to f32 +// CHECK: %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[CAST]] : i32 +// CHECK: %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32 +// CHECK: %[[RES_CAST:.*]] = llvm.bitcast %[[SEL]] : i32 to f32 // CHECK: return %[[RES_CAST]] : f32 %0 = amdgpu.permlane_swap %arg0 16 : f32 return %0 : f32 @@ -47,8 +59,11 @@ func.func @test_permlane16_f32(%arg0 : f32) -> f32 { func.func @test_permlane32_f32(%arg0 : f32) -> f32 { // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG0]] : f32 to i32 // CHECK: %[[PERM:.*]] = rocdl.permlane32.swap %[[CAST]], %[[CAST]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> -// CHECK: %[[RES_CAST:.*]] = llvm.bitcast %[[RES]] : i32 to f32 +// CHECK: %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[CAST]] : i32 +// CHECK: %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32 +// CHECK: %[[RES_CAST:.*]] = llvm.bitcast %[[SEL]] : i32 to f32 // CHECK: return %[[RES_CAST]] : f32 %0 = amdgpu.permlane_swap %arg0 32 : f32 return %0 : f32 @@ -60,8 +75,11 @@ func.func @test_permlane16_f16(%arg0 : f16) -> f16 { // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG0]] : f16 to i16 // CHECK: %[[ZEXT:.*]] = llvm.zext %[[CAST]] : i16 to i32 // CHECK: %[[PERM:.*]] = rocdl.permlane16.swap %[[ZEXT]], %[[ZEXT]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> -// CHECK: %[[TRUNC:.*]] = llvm.trunc %[[RES]] : i32 to i16 +// CHECK: %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ZEXT]] : i32 +// CHECK: %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32 +// CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SEL]] : i32 to i16 // CHECK: %[[RES_CAST:.*]] = llvm.bitcast %[[TRUNC]] : i16 to f16 // CHECK: return %[[RES_CAST]] : f16 %0 = amdgpu.permlane_swap %arg0 16 : f16 @@ -74,8 +92,11 @@ func.func @test_permlane32_f16(%arg0 : f16) -> f16 { // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG0]] : f16 to i16 // CHECK: %[[ZEXT:.*]] = llvm.zext %[[CAST]] : i16 to i32 // CHECK: %[[PERM:.*]] = rocdl.permlane32.swap %[[ZEXT]], %[[ZEXT]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[RES:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> -// CHECK: %[[TRUNC:.*]] = llvm.trunc %[[RES]] : i32 to i16 +// CHECK: %[[E0:.*]] = llvm.extractvalue %[[PERM]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[E1:.*]] = llvm.extractvalue %[[PERM]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP:.*]] = llvm.icmp "eq" %[[E0]], %[[ZEXT]] : i32 +// CHECK: %[[SEL:.*]] = llvm.select %[[CMP]], %[[E1]], %[[E0]] : i1, i32 +// CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SEL]] : i32 to i16 // CHECK: %[[RES_CAST:.*]] = llvm.bitcast %[[TRUNC]] : i16 to f16 // CHECK: return %[[RES_CAST]] : f16 %0 = amdgpu.permlane_swap %arg0 32 : f16 @@ -90,10 +111,16 @@ func.func @test_permlane16_2xi32(%arg0 : vector<2xi32>) -> vector<2xi32> { // CHECK-DAG: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[ELEM0:.*]] = llvm.extractelement %[[ARG0]][%[[C0]] : i32] : vector<2xi32> // CHECK: %[[ELEM1:.*]] = llvm.extractelement %[[ARG0]][%[[C1]] : i32] : vector<2xi32> -// CHECK: %[[PERM0_TUPLE:.*]] = rocdl.permlane16.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[PERM0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)> -// CHECK: %[[PERM1_TUPLE:.*]] = rocdl.permlane16.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[PERM1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[T0:.*]] = rocdl.permlane16.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)> +// CHECK: %[[T0_0:.*]] = llvm.extractvalue %[[T0]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[T0_1:.*]] = llvm.extractvalue %[[T0]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP0:.*]] = llvm.icmp "eq" %[[T0_0]], %[[ELEM0]] : i32 +// CHECK: %[[PERM0:.*]] = llvm.select %[[CMP0]], %[[T0_1]], %[[T0_0]] : i1, i32 +// CHECK: %[[T1:.*]] = rocdl.permlane16.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)> +// CHECK: %[[T1_0:.*]] = llvm.extractvalue %[[T1]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[T1_1:.*]] = llvm.extractvalue %[[T1]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP1:.*]] = llvm.icmp "eq" %[[T1_0]], %[[ELEM1]] : i32 +// CHECK: %[[PERM1:.*]] = llvm.select %[[CMP1]], %[[T1_1]], %[[T1_0]] : i1, i32 // CHECK: %[[VEC_INSERT0:.*]] = llvm.insertelement %[[PERM0]], %[[POISON]][%[[C0]] : i32] : vector<2xi32> // CHECK: %[[VEC_INSERT1:.*]] = llvm.insertelement %[[PERM1]], %[[VEC_INSERT0]][%[[C1]] : i32] : vector<2xi32> // CHECK: return %[[VEC_INSERT1]] : vector<2xi32> @@ -109,10 +136,16 @@ func.func @test_permlane32_2xi32(%arg0 : vector<2xi32>) -> vector<2xi32> { // CHECK-DAG: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[ELEM0:.*]] = llvm.extractelement %[[ARG0]][%[[C0]] : i32] : vector<2xi32> // CHECK: %[[ELEM1:.*]] = llvm.extractelement %[[ARG0]][%[[C1]] : i32] : vector<2xi32> -// CHECK: %[[PERM0_TUPLE:.*]] = rocdl.permlane32.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[PERM0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)> -// CHECK: %[[PERM1_TUPLE:.*]] = rocdl.permlane32.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[PERM1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[T0:.*]] = rocdl.permlane32.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)> +// CHECK: %[[T0_0:.*]] = llvm.extractvalue %[[T0]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[T0_1:.*]] = llvm.extractvalue %[[T0]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP0:.*]] = llvm.icmp "eq" %[[T0_0]], %[[ELEM0]] : i32 +// CHECK: %[[PERM0:.*]] = llvm.select %[[CMP0]], %[[T0_1]], %[[T0_0]] : i1, i32 +// CHECK: %[[T1:.*]] = rocdl.permlane32.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)> +// CHECK: %[[T1_0:.*]] = llvm.extractvalue %[[T1]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[T1_1:.*]] = llvm.extractvalue %[[T1]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP1:.*]] = llvm.icmp "eq" %[[T1_0]], %[[ELEM1]] : i32 +// CHECK: %[[PERM1:.*]] = llvm.select %[[CMP1]], %[[T1_1]], %[[T1_0]] : i1, i32 // CHECK: %[[VEC_INSERT0:.*]] = llvm.insertelement %[[PERM0]], %[[POISON]][%[[C0]] : i32] : vector<2xi32> // CHECK: %[[VEC_INSERT1:.*]] = llvm.insertelement %[[PERM1]], %[[VEC_INSERT0]][%[[C1]] : i32] : vector<2xi32> // CHECK: return %[[VEC_INSERT1]] : vector<2xi32> @@ -130,9 +163,15 @@ func.func @test_permlane16_4xf16(%arg0 : vector<4xf16>) -> vector<4xf16> { // CHECK: %[[ELEM0:.*]] = llvm.extractelement %[[CAST1]][%[[C0]] : i32] : vector<2xi32> // CHECK: %[[ELEM1:.*]] = llvm.extractelement %[[CAST1]][%[[C1]] : i32] : vector<2xi32> // CHECK: %[[PERM0_TUPLE:.*]] = rocdl.permlane16.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[PERM0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[PERM0_E0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[PERM0_E1:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP0:.*]] = llvm.icmp "eq" %[[PERM0_E0]], %[[ELEM0]] : i32 +// CHECK: %[[PERM0:.*]] = llvm.select %[[CMP0]], %[[PERM0_E1]], %[[PERM0_E0]] : i1, i32 // CHECK: %[[PERM1_TUPLE:.*]] = rocdl.permlane16.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[PERM1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[PERM1_E0:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[PERM1_E1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP1:.*]] = llvm.icmp "eq" %[[PERM1_E0]], %[[ELEM1]] : i32 +// CHECK: %[[PERM1:.*]] = llvm.select %[[CMP1]], %[[PERM1_E1]], %[[PERM1_E0]] : i1, i32 // CHECK: %[[VEC_INSERT0:.*]] = llvm.insertelement %[[PERM0]], %[[POISON]][%[[C0]] : i32] : vector<2xi32> // CHECK: %[[VEC_INSERT1:.*]] = llvm.insertelement %[[PERM1]], %[[VEC_INSERT0]][%[[C1]] : i32] : vector<2xi32> // CHECK: %[[CAST2:.*]] = llvm.bitcast %[[VEC_INSERT1]] : vector<2xi32> to vector<4xf16> @@ -151,9 +190,15 @@ func.func @test_permlane32_4xf16(%arg0 : vector<4xf16>) -> vector<4xf16> { // CHECK: %[[ELEM0:.*]] = llvm.extractelement %[[CAST1]][%[[C0]] : i32] : vector<2xi32> // CHECK: %[[ELEM1:.*]] = llvm.extractelement %[[CAST1]][%[[C1]] : i32] : vector<2xi32> // CHECK: %[[PERM0_TUPLE:.*]] = rocdl.permlane32.swap %[[ELEM0]], %[[ELEM0]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[PERM0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[PERM0_E0:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[PERM0_E1:.*]] = llvm.extractvalue %[[PERM0_TUPLE]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP0:.*]] = llvm.icmp "eq" %[[PERM0_E0]], %[[ELEM0]] : i32 +// CHECK: %[[PERM0:.*]] = llvm.select %[[CMP0]], %[[PERM0_E1]], %[[PERM0_E0]] : i1, i32 // CHECK: %[[PERM1_TUPLE:.*]] = rocdl.permlane32.swap %[[ELEM1]], %[[ELEM1]], false, false : (i32, i32) -> <(i32, i32)> -// CHECK: %[[PERM1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[PERM1_E0:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][0] : !llvm.struct<(i32, i32)> +// CHECK: %[[PERM1_E1:.*]] = llvm.extractvalue %[[PERM1_TUPLE]][1] : !llvm.struct<(i32, i32)> +// CHECK: %[[CMP1:.*]] = llvm.icmp "eq" %[[PERM1_E0]], %[[ELEM1]] : i32 +// CHECK: %[[PERM1:.*]] = llvm.select %[[CMP1]], %[[PERM1_E1]], %[[PERM1_E0]] : i1, i32 // CHECK: %[[VEC_INSERT0:.*]] = llvm.insertelement %[[PERM0]], %[[POISON]][%[[C0]] : i32] : vector<2xi32> // CHECK: %[[VEC_INSERT1:.*]] = llvm.insertelement %[[PERM1]], %[[VEC_INSERT0]][%[[C1]] : i32] : vector<2xi32> // CHECK: %[[CAST2:.*]] = llvm.bitcast %[[VEC_INSERT1]] : vector<2xi32> to vector<4xf16> diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir old mode 100644 new mode 100755 index c6261b37ef8f2..ef631ce8a12e5 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -749,13 +749,19 @@ gpu.module @test_module { %shfl1, %pred1 = gpu.shuffle xor %arg0, %arg1, %arg4 : f32 // CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32 // CHECK: %[[#PERMUTE:]] = rocdl.permlane16.swap %[[#CAST_VALUE]], %[[#CAST_VALUE]], false, false : (i32, i32) -> <(i32, i32)> - // CHECK: %[[#EXTRACT:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)> - // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#EXTRACT]] : i32 to f32 + // CHECK: %[[#EXTRACT0:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)> + // CHECK: %[[#EXTRACT1:]] = llvm.extractvalue %[[#PERMUTE:]][1] : !llvm.struct<(i32, i32)> + // CHECK: %[[#CMP:]] = llvm.icmp "eq" %[[#EXTRACT0]], %[[#CAST_VALUE]] : i32 + // CHECK: %[[#SEL:]] = llvm.select %[[#CMP]], %[[#EXTRACT1]], %[[#EXTRACT0]] : i1, i32 + // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#SEL]] : i32 to f32 %shfl2, %pred2 = gpu.shuffle xor %arg0, %arg2, %arg4 : f32 // CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32 // CHECK: %[[#PERMUTE:]] = rocdl.permlane32.swap %[[#CAST_VALUE]], %[[#CAST_VALUE]], false, false : (i32, i32) -> <(i32, i32)> - // CHECK: %[[#EXTRACT:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)> - // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#EXTRACT]] : i32 to f32 + // CHECK: %[[#EXTRACT0:]] = llvm.extractvalue %[[#PERMUTE:]][0] : !llvm.struct<(i32, i32)> + // CHECK: %[[#EXTRACT1:]] = llvm.extractvalue %[[#PERMUTE:]][1] : !llvm.struct<(i32, i32)> + // CHECK: %[[#CMP:]] = llvm.icmp "eq" %[[#EXTRACT0]], %[[#CAST_VALUE]] : i32 + // CHECK: %[[#SEL:]] = llvm.select %[[#CMP]], %[[#EXTRACT1]], %[[#EXTRACT0]] : i1, i32 + // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#SEL]] : i32 to f32 %shfl3, %pred3 = gpu.shuffle xor %arg0, %arg3, %arg4 : f32 func.return %shfl1, %shfl2, %shfl3 : f32, f32, f32 } From 93a1470a97ec2c57247824ff2a59437831fea6de Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 12 Sep 2025 13:45:36 +0100 Subject: [PATCH 117/734] [InstCombine] Remove redundant alignment assumptions. (#123348) Use known bits to remove redundant alignment assumptions. Libc++ now adds alignment assumptions for std::vector::begin() and std::vector::end(), so I expect we will see quite a bit more assumptions in C++ [1]. Try to clean up some redundant ones to start with. [1] https://github.com/llvm/llvm-project/pull/108961 PR: https://github.com/llvm/llvm-project/pull/123348 --- .../InstCombine/InstCombineCalls.cpp | 35 ++++++++++++++++--- llvm/test/Analysis/BasicAA/featuretest.ll | 26 +++++--------- .../Transforms/InstCombine/assume-align.ll | 15 ++++++-- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 33b66aeaffe60..11bac7bdb6eb2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3385,12 +3385,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // TODO: apply range metadata for range check patterns? } - // Separate storage assumptions apply to the underlying allocations, not any - // particular pointer within them. When evaluating the hints for AA purposes - // we getUnderlyingObject them; by precomputing the answers here we can - // avoid having to do so repeatedly there. for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) { OperandBundleUse OBU = II->getOperandBundleAt(Idx); + + // Separate storage assumptions apply to the underlying allocations, not + // any particular pointer within them. When evaluating the hints for AA + // purposes we getUnderlyingObject them; by precomputing the answers here + // we can avoid having to do so repeatedly there. if (OBU.getTagName() == "separate_storage") { assert(OBU.Inputs.size() == 2); auto MaybeSimplifyHint = [&](const Use &U) { @@ -3404,6 +3405,32 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { MaybeSimplifyHint(OBU.Inputs[0]); MaybeSimplifyHint(OBU.Inputs[1]); } + + // Try to remove redundant alignment assumptions. + if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) { + RetainedKnowledge RK = getKnowledgeFromOperandInAssume( + *cast(II), II->arg_size() + Idx); + if (!RK || RK.AttrKind != Attribute::Alignment || + !isPowerOf2_64(RK.ArgValue) || !isa(RK.IRArgValue)) + continue; + + // Don't try to remove align assumptions for pointers derived from + // arguments. We might lose information if the function gets inline and + // the align argument attribute disappears. + Value *UO = getUnderlyingObject(RK.WasOn); + if (!UO || isa(UO)) + continue; + + // Compute known bits for the pointer, passing nullptr as context to + // avoid computeKnownBits using the assumption we are about to remove + // for reasoning. + KnownBits Known = computeKnownBits(RK.WasOn, /*CtxI=*/nullptr); + unsigned TZ = std::min(Known.countMinTrailingZeros(), + Value::MaxAlignmentExponent); + if ((1ULL << TZ) < RK.ArgValue) + continue; + return CallBase::removeOperandBundle(II, OBU.getTagID()); + } } // Convert nonnull assume like: diff --git a/llvm/test/Analysis/BasicAA/featuretest.ll b/llvm/test/Analysis/BasicAA/featuretest.ll index e4cb009f0c633..04c4725d26c1d 100644 --- a/llvm/test/Analysis/BasicAA/featuretest.ll +++ b/llvm/test/Analysis/BasicAA/featuretest.ll @@ -15,24 +15,14 @@ declare void @llvm.assume(i1) ; operations on another array. Important for scientific codes. ; define i32 @different_array_test(i64 %A, i64 %B) { -; NO_ASSUME-LABEL: @different_array_test( -; NO_ASSUME-NEXT: [[ARRAY11:%.*]] = alloca [100 x i32], align 4 -; NO_ASSUME-NEXT: [[ARRAY22:%.*]] = alloca [200 x i32], align 4 -; NO_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAY11]], i32 4) ] -; NO_ASSUME-NEXT: call void @external(ptr nonnull [[ARRAY11]]) -; NO_ASSUME-NEXT: call void @external(ptr nonnull [[ARRAY22]]) -; NO_ASSUME-NEXT: [[POINTER2:%.*]] = getelementptr i32, ptr [[ARRAY22]], i64 [[B:%.*]] -; NO_ASSUME-NEXT: store i32 7, ptr [[POINTER2]], align 4 -; NO_ASSUME-NEXT: ret i32 0 -; -; USE_ASSUME-LABEL: @different_array_test( -; USE_ASSUME-NEXT: [[ARRAY11:%.*]] = alloca [100 x i32], align 4 -; USE_ASSUME-NEXT: [[ARRAY22:%.*]] = alloca [200 x i32], align 4 -; USE_ASSUME-NEXT: call void @external(ptr nonnull [[ARRAY11]]) -; USE_ASSUME-NEXT: call void @external(ptr nonnull [[ARRAY22]]) -; USE_ASSUME-NEXT: [[POINTER2:%.*]] = getelementptr i32, ptr [[ARRAY22]], i64 [[B:%.*]] -; USE_ASSUME-NEXT: store i32 7, ptr [[POINTER2]], align 4 -; USE_ASSUME-NEXT: ret i32 0 +; CHECK-LABEL: @different_array_test( +; CHECK-NEXT: [[ARRAY11:%.*]] = alloca [100 x i32], align 4 +; CHECK-NEXT: [[ARRAY22:%.*]] = alloca [200 x i32], align 4 +; CHECK-NEXT: call void @external(ptr nonnull [[ARRAY11]]) +; CHECK-NEXT: call void @external(ptr nonnull [[ARRAY22]]) +; CHECK-NEXT: [[POINTER2:%.*]] = getelementptr i32, ptr [[ARRAY22]], i64 [[B:%.*]] +; CHECK-NEXT: store i32 7, ptr [[POINTER2]], align 4 +; CHECK-NEXT: ret i32 0 ; %Array1 = alloca i32, i32 100 %Array2 = alloca i32, i32 200 diff --git a/llvm/test/Transforms/InstCombine/assume-align.ll b/llvm/test/Transforms/InstCombine/assume-align.ll index f0e0257433086..274632658496b 100644 --- a/llvm/test/Transforms/InstCombine/assume-align.ll +++ b/llvm/test/Transforms/InstCombine/assume-align.ll @@ -175,7 +175,6 @@ define ptr @dont_fold_assume_align_zero_of_loaded_pointer_into_align_metadata(pt define ptr @redundant_assume_align_1(ptr %p) { ; CHECK-LABEL: @redundant_assume_align_1( ; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i32 1) ] ; CHECK-NEXT: call void @foo(ptr [[P2]]) ; CHECK-NEXT: ret ptr [[P2]] ; @@ -189,7 +188,6 @@ define ptr @redundant_assume_align_1(ptr %p) { define ptr @redundant_assume_align_8_via_align_metadata(ptr %p) { ; CHECK-LABEL: @redundant_assume_align_8_via_align_metadata( ; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align [[META0:![0-9]+]] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i32 8) ] ; CHECK-NEXT: call void @foo(ptr [[P2]]) ; CHECK-NEXT: ret ptr [[P2]] ; @@ -250,6 +248,19 @@ define ptr @redundant_assume_align_8_via_asume(ptr %p) { } declare void @foo(ptr) + +; !align must have a constant integer alignment. +define ptr @assume_load_pointer_result(ptr %p, i64 %align) { +; CHECK-LABEL: @assume_load_pointer_result( +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 [[ALIGN:%.*]]) ] +; CHECK-NEXT: ret ptr [[P2]] +; + %p2 = load ptr, ptr %p + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 %align) ] + ret ptr %p2 +} + ;. ; CHECK: [[META0]] = !{i64 8} ;. From 45f6c5015892cc0361645319833fffcfe2dafd2f Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Fri, 12 Sep 2025 15:52:24 +0300 Subject: [PATCH 118/734] [TableGen][DecoderEmitter] Decode operands with "all zeros" encoding (#158163) Follow-up to #156358. The original change didn't take into account operands with "all zeros" encoding, now fixed. --- .../FixedLenDecoderEmitter/InitValue.td | 46 ------------- .../FixedLenDecoderEmitter/operand-decoder.td | 66 +++++++++++++++++++ llvm/utils/TableGen/DecoderEmitter.cpp | 47 +++++++------ 3 files changed, 88 insertions(+), 71 deletions(-) delete mode 100644 llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td create mode 100644 llvm/test/TableGen/FixedLenDecoderEmitter/operand-decoder.td diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td b/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td deleted file mode 100644 index 03847439ffc2e..0000000000000 --- a/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td +++ /dev/null @@ -1,46 +0,0 @@ -// RUN: llvm-tblgen -gen-disassembler -I %p/../../../include %s | FileCheck %s - -include "llvm/Target/Target.td" - -def archInstrInfo : InstrInfo { } - -def arch : Target { - let InstructionSet = archInstrInfo; -} - -let OutOperandList = (outs), Size = 2 in { - -def foo : Instruction { - let InOperandList = (ins i32imm:$factor); - field bits<16> Inst; - field bits<16> SoftFail = 0; - bits<8> factor; - let factor{0} = 0; // zero initial value - let Inst{15...8} = factor{7...0}; - } - -def bar : Instruction { - let InOperandList = (ins i32imm:$factor); - field bits<16> Inst; - field bits<16> SoftFail = 0; - bits<8> factor; - let factor{0} = 1; // non-zero initial value - let Inst{15...8} = factor{7...0}; - } - -def bax : Instruction { - let InOperandList = (ins i32imm:$factor); - field bits<16> Inst; - field bits<16> SoftFail = 0; - bits<33> factor; - let factor{32} = 1; // non-zero initial value - let Inst{15...8} = factor{32...25}; - } - -} - -// CHECK: tmp = fieldFromInstruction(insn, 9, 7) << 1; -// CHECK: tmp = 0x1; -// CHECK: insertBits(tmp, fieldFromInstruction(insn, 9, 7), 1, 7); -// CHECK: tmp = 0x100000000; -// CHECK: insertBits(tmp, fieldFromInstruction(insn, 8, 7), 25, 7); diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/operand-decoder.td b/llvm/test/TableGen/FixedLenDecoderEmitter/operand-decoder.td new file mode 100644 index 0000000000000..f281996cf9a86 --- /dev/null +++ b/llvm/test/TableGen/FixedLenDecoderEmitter/operand-decoder.td @@ -0,0 +1,66 @@ +// RUN: llvm-tblgen -gen-disassembler -I %p/../../../include %s | FileCheck %s + +include "llvm/Target/Target.td" + +def R0 : Register<"r0">; +def RC : RegisterClass<"MyTarget", [i32], 32, (add R0)>; + +def MyInstrInfo : InstrInfo; + +def MyTarget : Target { + let InstructionSet = MyInstrInfo; +} + +// CHECK-LABEL: case 0: +// CHECK-NEXT: if (!Check(S, DecodeRCRegisterClass(MI, Decoder))) +// CHECK-NEXT: return MCDisassembler::Fail; +// CHECK-NEXT: tmp = fieldFromInstruction(insn, 2, 4); +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: tmp = 0x0; +// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 0, 2), 0, 2); +// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 6, 2), 2, 2); +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: tmp = 0x0; +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: tmp = fieldFromInstruction(insn, 13, 2) << 1; +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: tmp = 0x0; +// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 17, 1), 1, 1); +// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 19, 1), 3, 1); +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: tmp = 0x5; +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: tmp = 0x2; +// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 26, 2), 2, 2); +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: tmp = 0xa; +// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 28, 1), 0, 1); +// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 30, 1), 2, 1); +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: return S; + +def I : Instruction { + let OutOperandList = (outs RC:$op0); + let InOperandList = (ins i32imm:$op1, i32imm:$op2, i32imm:$op3, i32imm:$op4, + i32imm:$op5, i32imm:$op6, i32imm:$op7, i32imm:$op8); + let Size = 4; + bits<32> Inst; + bits<0> op0; // no init, no variable parts + bits<4> op1; // no init, 1 variable part + bits<4> op2; // no init, 2 variable parts + bits<4> op3 = 0b0000; // zero init, no variable parts + bits<4> op4 = {0, ?, ?, 0}; // zero init, 1 variable part + bits<4> op5 = {?, 0, ?, 0}; // zero init, 2 variable parts + bits<4> op6 = 0b0101; // non-zero init, no variable parts + bits<4> op7 = {?, ?, 1, 0}; // non-zero init, 1 variable part + bits<4> op8 = {1, ?, 1, ?}; // non-zero init, 2 variable parts + let Inst{5...2} = op1; + let Inst{1...0} = op2{1...0}; + let Inst{7...6} = op2{3...2}; + let Inst{11...8} = op3; + let Inst{15...12} = op4; + let Inst{19...16} = op5; + let Inst{23...20} = op6; + let Inst{27...24} = op7; + let Inst{31...28} = op8; +} diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 8747d02ac892b..a8a9036a1a7f4 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/KnownBits.h" @@ -169,8 +170,6 @@ struct OperandInfo { Fields.emplace_back(Base, Width, Offset); } - unsigned numFields() const { return Fields.size(); } - ArrayRef fields() const { return Fields; } }; @@ -1104,31 +1103,29 @@ void DecoderTableBuilder::emitBinaryParser(raw_ostream &OS, indent Indent, return; } - if (OpInfo.Fields.empty() && OpInfo.InitValue && IgnoreFullyDefinedOperands) - return; - - // We need to construct the encoding of the operand from pieces if it is not - // encoded sequentially or has a non-zero constant part in the encoding. - bool UseInsertBits = OpInfo.numFields() > 1 || OpInfo.InitValue.value_or(0); - - if (UseInsertBits) { - OS << Indent << "tmp = 0x"; - OS.write_hex(OpInfo.InitValue.value_or(0)); - OS << ";\n"; - } - - for (const auto &[Base, Width, Offset] : OpInfo.fields()) { - OS << Indent; - if (UseInsertBits) - OS << "insertBits(tmp, "; - else - OS << "tmp = "; - OS << "fieldFromInstruction(insn, " << Base << ", " << Width << ')'; - if (UseInsertBits) - OS << ", " << Offset << ", " << Width << ')'; - else if (Offset != 0) + if (OpInfo.fields().empty()) { + // Only a constant part. The old behavior is to not decode this operand. + if (IgnoreFullyDefinedOperands) + return; + // Initialize `tmp` with the constant part. + OS << Indent << "tmp = " << format_hex(*OpInfo.InitValue, 0) << ";\n"; + } else if (OpInfo.fields().size() == 1 && !OpInfo.InitValue.value_or(0)) { + // One variable part and no/zero constant part. Initialize `tmp` with the + // variable part. + auto [Base, Width, Offset] = OpInfo.fields().front(); + OS << Indent << "tmp = fieldFromInstruction(insn, " << Base << ", " << Width + << ')'; + if (Offset) OS << " << " << Offset; OS << ";\n"; + } else { + // General case. Initialize `tmp` with the constant part, if any, and + // insert the variable parts into it. + OS << Indent << "tmp = " << format_hex(OpInfo.InitValue.value_or(0), 0) + << ";\n"; + for (auto [Base, Width, Offset] : OpInfo.fields()) + OS << Indent << "insertBits(tmp, fieldFromInstruction(insn, " << Base + << ", " << Width << "), " << Offset << ", " << Width << ");\n"; } StringRef Decoder = OpInfo.Decoder; From 61664b61032edf8763f48099636bc7cd35ef622e Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Fri, 12 Sep 2025 15:08:13 +0200 Subject: [PATCH 119/734] [clang] fix clang_cmake_builddir (#155844) When building llvm from a subdirectory (like clspv does) `CMAKE_BINARY_DIR` is at the top of the build directory. When building runtimes (libclc for example), the build fails looking for clang (through `find_package` looking at `LLVM_BINARY_DIR` with `NO_DEFAULT_PATH` & `NO_CMAKE_FIND_ROOT_PATH`) because clang is not in `LLVM_BINARY_DIR`. Fix that issue by setting `clang_cmake_builddir` the same way we set `llvm_cmake_builddir` from `LLVM_BINARY_DIR`. For default llvm build (using llvm as the main cmake project), it should not change anything. --- clang/cmake/modules/CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/cmake/modules/CMakeLists.txt b/clang/cmake/modules/CMakeLists.txt index d2d68121371bf..90fbd88ca9826 100644 --- a/clang/cmake/modules/CMakeLists.txt +++ b/clang/cmake/modules/CMakeLists.txt @@ -8,15 +8,14 @@ include(FindPrefixFromConfig) # the usual CMake convention seems to be ${Project}Targets.cmake. set(CLANG_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/clang" CACHE STRING "Path for CMake subdirectory for Clang (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/clang')") -# CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below. -set(clang_cmake_builddir "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/clang") # Keep this in sync with llvm/cmake/CMakeLists.txt! set(LLVM_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/llvm" CACHE STRING "Path for CMake subdirectory for LLVM (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/llvm')") # CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below. -string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_cmake_builddir "${LLVM_LIBRARY_DIR}") -set(llvm_cmake_builddir "${llvm_cmake_builddir}/cmake/llvm") +string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_builddir "${LLVM_LIBRARY_DIR}") +set(llvm_cmake_builddir "${llvm_builddir}/cmake/llvm") +set(clang_cmake_builddir "${llvm_builddir}/cmake/clang") get_property(CLANG_EXPORTS GLOBAL PROPERTY CLANG_EXPORTS) export(TARGETS ${CLANG_EXPORTS} FILE ${clang_cmake_builddir}/ClangTargets.cmake) From 5149e51cb25d6a68365ca3bd9300cff1b18213e2 Mon Sep 17 00:00:00 2001 From: Jean-Didier PAILLEUX Date: Fri, 12 Sep 2025 15:29:21 +0200 Subject: [PATCH 120/734] [flang][Lower] Add lowering to SYNC ALL, SYNC MEMORY and SYNC IMAGES to PRIF (#154166) In relation to the approval and merge of the https://github.com/llvm/llvm-project/pull/76088 specification about multi-image features in Flang. Here is a PR on adding support for SYNC ALL, SYNC MEMORY and SYNC IMAGES in conformance with the PRIF specification. --------- Co-authored-by: Katherine Rasmussen --- flang/include/flang/Lower/AbstractConverter.h | 3 + .../flang/Optimizer/Builder/IntrinsicCall.h | 9 -- .../flang/Optimizer/Builder/Runtime/Coarray.h | 10 ++ flang/lib/Lower/Bridge.cpp | 10 ++ flang/lib/Lower/Runtime.cpp | 97 +++++++++++++++++-- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 12 +-- .../lib/Optimizer/Builder/Runtime/Coarray.cpp | 61 ++++++++++++ flang/test/Lower/Coarray/sync_all.f90 | 37 +++++++ flang/test/Lower/Coarray/sync_images.f90 | 62 ++++++++++++ flang/test/Lower/Coarray/sync_memory.f90 | 37 +++++++ 10 files changed, 317 insertions(+), 21 deletions(-) create mode 100644 flang/test/Lower/Coarray/sync_all.f90 create mode 100644 flang/test/Lower/Coarray/sync_images.f90 create mode 100644 flang/test/Lower/Coarray/sync_memory.f90 diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 8e9de418e1b7e..0ffe27ea038e8 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -271,6 +271,9 @@ class AbstractConverter { virtual const Fortran::lower::pft::FunctionLikeUnit * getCurrentFunctionUnit() const = 0; + /// Check support of Multi-image features if -fcoarray is provided + virtual void checkCoarrayEnabled() = 0; + //===--------------------------------------------------------------------===// // Types //===--------------------------------------------------------------------===// diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 3c020abd59417..d80ee9e861321 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -573,15 +573,6 @@ struct IntrinsicLibrary { void setResultMustBeFreed() { resultMustBeFreed = true; } - // Check support of coarray features - void checkCoarrayEnabled() { - if (converter && - !converter->getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::Coarray)) - fir::emitFatalError(loc, "Coarrays disabled, use '-fcoarray' to enable.", - false); - } - fir::FirOpBuilder &builder; mlir::Location loc; bool resultMustBeFreed = false; diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h index 10ed503a485a3..20bfb7c124af2 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h @@ -71,5 +71,15 @@ void genCoMin(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A, void genCoSum(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A, mlir::Value resultImage, mlir::Value stat, mlir::Value errmsg); +/// Generate call to runtime subroutine prif_sync_all +void genSyncAllStatement(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value stat, mlir::Value errmsg); +/// Generate call to runtime subroutine prif_sync_memory +void genSyncMemoryStatement(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value stat, mlir::Value errmsg); +/// Generate call to runtime subroutine prif_sync_images +void genSyncImagesStatement(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value imageSet, mlir::Value stat, + mlir::Value errmsg); } // namespace fir::runtime #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 6125ea9153662..4a5b9885bb7c4 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -1131,6 +1131,16 @@ class FirConverter : public Fortran::lower::AbstractConverter { return currentFunctionUnit; } + void checkCoarrayEnabled() override final { + if (!getFoldingContext().languageFeatures().IsEnabled( + Fortran::common::LanguageFeature::Coarray)) + fir::emitFatalError( + getCurrentLocation(), + "Not yet implemented: Multi-image features are experimental and are " + "disabled by default, use '-fcoarray' to enable.", + false); + } + void registerTypeInfo(mlir::Location loc, Fortran::lower::SymbolRef typeInfoSym, const Fortran::semantics::DerivedTypeSpec &typeSpec, diff --git a/flang/lib/Lower/Runtime.cpp b/flang/lib/Lower/Runtime.cpp index 494dd49e961b0..b19ca0182b4b5 100644 --- a/flang/lib/Lower/Runtime.cpp +++ b/flang/lib/Lower/Runtime.cpp @@ -12,6 +12,7 @@ #include "flang/Lower/OpenMP.h" #include "flang/Lower/StatementContext.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/Runtime/Coarray.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" @@ -47,6 +48,42 @@ static void genUnreachable(fir::FirOpBuilder &builder, mlir::Location loc) { builder.setInsertionPointToStart(newBlock); } +/// Initializes values for STAT and ERRMSG +static std::pair getStatAndErrmsg( + Fortran::lower::AbstractConverter &converter, mlir::Location loc, + const std::list &statOrErrList) { + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + + mlir::Value errMsgExpr, statExpr; + for (const Fortran::parser::StatOrErrmsg &statOrErr : statOrErrList) { + std::visit(Fortran::common::visitors{ + [&](const Fortran::parser::StatVariable &statVar) { + statExpr = fir::getBase(converter.genExprAddr( + loc, Fortran::semantics::GetExpr(statVar), stmtCtx)); + }, + [&](const Fortran::parser::MsgVariable &errMsgVar) { + const Fortran::semantics::SomeExpr *expr = + Fortran::semantics::GetExpr(errMsgVar); + errMsgExpr = fir::getBase( + converter.genExprBox(loc, *expr, stmtCtx)); + }}, + statOrErr.u); + } + + if (!statExpr) { + statExpr = fir::AbsentOp::create(builder, loc, + builder.getRefType(builder.getI32Type())); + } + if (!errMsgExpr) { + errMsgExpr = fir::AbsentOp::create( + builder, loc, + fir::BoxType::get(fir::CharacterType::get( + builder.getContext(), 1, fir::CharacterType::unknownLen()))); + } + return {statExpr, errMsgExpr}; +} + //===----------------------------------------------------------------------===// // Misc. Fortran statements that lower to runtime calls //===----------------------------------------------------------------------===// @@ -169,20 +206,68 @@ void Fortran::lower::genUnlockStatement( void Fortran::lower::genSyncAllStatement( Fortran::lower::AbstractConverter &converter, - const Fortran::parser::SyncAllStmt &) { - TODO(converter.getCurrentLocation(), "coarray: SYNC ALL runtime"); + const Fortran::parser::SyncAllStmt &stmt) { + mlir::Location loc = converter.getCurrentLocation(); + converter.checkCoarrayEnabled(); + + // Handle STAT and ERRMSG values + const std::list &statOrErrList = stmt.v; + auto [statAddr, errMsgAddr] = getStatAndErrmsg(converter, loc, statOrErrList); + + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + fir::runtime::genSyncAllStatement(builder, loc, statAddr, errMsgAddr); } void Fortran::lower::genSyncImagesStatement( Fortran::lower::AbstractConverter &converter, - const Fortran::parser::SyncImagesStmt &) { - TODO(converter.getCurrentLocation(), "coarray: SYNC IMAGES runtime"); + const Fortran::parser::SyncImagesStmt &stmt) { + mlir::Location loc = converter.getCurrentLocation(); + converter.checkCoarrayEnabled(); + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + + // Handle STAT and ERRMSG values + const std::list &statOrErrList = + std::get>(stmt.t); + auto [statAddr, errMsgAddr] = getStatAndErrmsg(converter, loc, statOrErrList); + + // SYNC_IMAGES(*) is passed as count == -1 while SYNC IMAGES([]) has count + // == 0. Note further that SYNC IMAGES(*) is not semantically equivalent to + // SYNC ALL. + Fortran::lower::StatementContext stmtCtx; + mlir::Value imageSet; + const Fortran::parser::SyncImagesStmt::ImageSet &imgSet = + std::get(stmt.t); + std::visit(Fortran::common::visitors{ + [&](const Fortran::parser::IntExpr &intExpr) { + const SomeExpr *expr = Fortran::semantics::GetExpr(intExpr); + imageSet = + fir::getBase(converter.genExprBox(loc, *expr, stmtCtx)); + }, + [&](const Fortran::parser::Star &) { + imageSet = fir::AbsentOp::create( + builder, loc, + fir::BoxType::get(fir::SequenceType::get( + {fir::SequenceType::getUnknownExtent()}, + builder.getI32Type()))); + }}, + imgSet.u); + + fir::runtime::genSyncImagesStatement(builder, loc, imageSet, statAddr, + errMsgAddr); } void Fortran::lower::genSyncMemoryStatement( Fortran::lower::AbstractConverter &converter, - const Fortran::parser::SyncMemoryStmt &) { - TODO(converter.getCurrentLocation(), "coarray: SYNC MEMORY runtime"); + const Fortran::parser::SyncMemoryStmt &stmt) { + mlir::Location loc = converter.getCurrentLocation(); + converter.checkCoarrayEnabled(); + + // Handle STAT and ERRMSG values + const std::list &statOrErrList = stmt.v; + auto [statAddr, errMsgAddr] = getStatAndErrmsg(converter, loc, statOrErrList); + + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + fir::runtime::genSyncMemoryStatement(builder, loc, statAddr, errMsgAddr); } void Fortran::lower::genSyncTeamStatement( diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 6ae48c1d5d88b..aa12dbff5935b 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -3716,7 +3716,7 @@ mlir::Value IntrinsicLibrary::genCmplx(mlir::Type resultType, // CO_BROADCAST void IntrinsicLibrary::genCoBroadcast(llvm::ArrayRef args) { - checkCoarrayEnabled(); + converter->checkCoarrayEnabled(); assert(args.size() == 4); mlir::Value sourceImage = fir::getBase(args[1]); mlir::Value status = @@ -3735,7 +3735,7 @@ void IntrinsicLibrary::genCoBroadcast(llvm::ArrayRef args) { // CO_MAX void IntrinsicLibrary::genCoMax(llvm::ArrayRef args) { - checkCoarrayEnabled(); + converter->checkCoarrayEnabled(); assert(args.size() == 4); mlir::Value refNone = fir::AbsentOp::create(builder, loc, @@ -3755,7 +3755,7 @@ void IntrinsicLibrary::genCoMax(llvm::ArrayRef args) { // CO_MIN void IntrinsicLibrary::genCoMin(llvm::ArrayRef args) { - checkCoarrayEnabled(); + converter->checkCoarrayEnabled(); assert(args.size() == 4); mlir::Value refNone = fir::AbsentOp::create(builder, loc, @@ -3775,7 +3775,7 @@ void IntrinsicLibrary::genCoMin(llvm::ArrayRef args) { // CO_SUM void IntrinsicLibrary::genCoSum(llvm::ArrayRef args) { - checkCoarrayEnabled(); + converter->checkCoarrayEnabled(); assert(args.size() == 4); mlir::Value absentInt = fir::AbsentOp::create(builder, loc, @@ -7438,7 +7438,7 @@ IntrinsicLibrary::genNull(mlir::Type, llvm::ArrayRef args) { fir::ExtendedValue IntrinsicLibrary::genNumImages(mlir::Type resultType, llvm::ArrayRef args) { - checkCoarrayEnabled(); + converter->checkCoarrayEnabled(); assert(args.size() == 0 || args.size() == 1); if (args.size()) @@ -8519,7 +8519,7 @@ mlir::Value IntrinsicLibrary::genThisGrid(mlir::Type resultType, fir::ExtendedValue IntrinsicLibrary::genThisImage(mlir::Type resultType, llvm::ArrayRef args) { - checkCoarrayEnabled(); + converter->checkCoarrayEnabled(); assert(args.size() >= 1 && args.size() <= 3); const bool coarrayIsAbsent = args.size() == 1; mlir::Value team = diff --git a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp index 9a893d61122ac..364e7b753c6ee 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp @@ -165,3 +165,64 @@ void fir::runtime::genCoSum(fir::FirOpBuilder &builder, mlir::Location loc, genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg, PRIFNAME_SUB("co_sum")); } + +/// Generate call to runtime subroutine prif_sync_all +void fir::runtime::genSyncAllStatement(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value stat, + mlir::Value errmsg) { + mlir::FunctionType ftype = + PRIF_FUNCTYPE(PRIF_STAT_TYPE, PRIF_ERRMSG_TYPE, PRIF_ERRMSG_TYPE); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, PRIFNAME_SUB("sync_all"), ftype); + + auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, errmsg); + llvm::SmallVector args = fir::runtime::createArguments( + builder, loc, ftype, stat, errmsgArg, errmsgAllocArg); + fir::CallOp::create(builder, loc, funcOp, args); +} + +/// Generate call to runtime subroutine prif_sync_memory +void fir::runtime::genSyncMemoryStatement(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value stat, + mlir::Value errmsg) { + mlir::FunctionType ftype = + PRIF_FUNCTYPE(PRIF_STAT_TYPE, PRIF_ERRMSG_TYPE, PRIF_ERRMSG_TYPE); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, PRIFNAME_SUB("sync_memory"), ftype); + + auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, errmsg); + llvm::SmallVector args = fir::runtime::createArguments( + builder, loc, ftype, stat, errmsgArg, errmsgAllocArg); + fir::CallOp::create(builder, loc, funcOp, args); +} + +/// Generate call to runtime subroutine prif_sync_images +void fir::runtime::genSyncImagesStatement(fir::FirOpBuilder &builder, + mlir::Location loc, + mlir::Value imageSet, + mlir::Value stat, + mlir::Value errmsg) { + mlir::Type imgSetTy = fir::BoxType::get(fir::SequenceType::get( + {fir::SequenceType::getUnknownExtent()}, builder.getI32Type())); + mlir::FunctionType ftype = PRIF_FUNCTYPE(imgSetTy, PRIF_STAT_TYPE, + PRIF_ERRMSG_TYPE, PRIF_ERRMSG_TYPE); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, PRIFNAME_SUB("sync_images"), ftype); + + // If imageSet is scalar, PRIF require to pass an array of size 1. + if (auto boxTy = mlir::dyn_cast(imageSet.getType())) { + if (!mlir::isa(boxTy.getEleTy())) { + mlir::Value one = + builder.createIntegerConstant(loc, builder.getI32Type(), 1); + mlir::Value shape = fir::ShapeOp::create(builder, loc, one); + imageSet = fir::ReboxOp::create( + builder, loc, + fir::BoxType::get(fir::SequenceType::get({1}, builder.getI32Type())), + imageSet, shape, mlir::Value{}); + } + } + auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, errmsg); + llvm::SmallVector args = fir::runtime::createArguments( + builder, loc, ftype, imageSet, stat, errmsgArg, errmsgAllocArg); + fir::CallOp::create(builder, loc, funcOp, args); +} diff --git a/flang/test/Lower/Coarray/sync_all.f90 b/flang/test/Lower/Coarray/sync_all.f90 new file mode 100644 index 0000000000000..c2c12d8cdf237 --- /dev/null +++ b/flang/test/Lower/Coarray/sync_all.f90 @@ -0,0 +1,37 @@ +! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s --check-prefixes=COARRAY +! RUN: not %flang_fc1 -emit-hlfir %s 2>&1 | FileCheck %s --check-prefixes=NOCOARRAY + +program test_sync_all + implicit none + ! NOCOARRAY: Not yet implemented: Multi-image features are experimental and are disabled by default, use '-fcoarray' to enable. + + ! COARRAY: %[[ERRMSG:.*]]:2 = hlfir.declare %[[VAL_1:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) + ! COARRAY: %[[STAT:.*]]:2 = hlfir.declare %[[VAL_2:.*]] {uniq_name = "_QFEsync_status"} : (!fir.ref) -> (!fir.ref, !fir.ref) + integer sync_status + character(len=128) :: error_message + + ! COARRAY: %[[VAL_3:.*]] = fir.absent !fir.ref + ! COARRAY: %[[VAL_4:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_5:.*]] = fir.absent !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_all(%[[VAL_3]], %[[VAL_4]], %[[VAL_5]]) fastmath : (!fir.ref, !fir.box>, !fir.box>) -> () + sync all + + ! COARRAY: %[[VAL_6:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_7:.*]] = fir.absent !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_all(%[[STAT]]#0, %[[VAL_6]], %[[VAL_7]]) fastmath : (!fir.ref, !fir.box>, !fir.box>) -> () + sync all(stat=sync_status) + + ! COARRAY: %[[VAL_8:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref>) -> !fir.box> + ! COARRAY: %[[VAL_9:.*]] = fir.absent !fir.ref + ! COARRAY: %[[VAL_10:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_11:.*]] = fir.convert %[[VAL_8]] : (!fir.box>) -> !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_all(%[[VAL_9]], %[[VAL_11]], %[[VAL_10]]) fastmath : (!fir.ref, !fir.box>, !fir.box>) -> () + sync all( errmsg=error_message) + + ! COARRAY: %[[VAL_12:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref>) -> !fir.box> + ! COARRAY: %[[VAL_13:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_14:.*]] = fir.convert %[[VAL_12]] : (!fir.box>) -> !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_all(%[[STAT]]#0, %[[VAL_14]], %[[VAL_13]]) fastmath : (!fir.ref, !fir.box>, !fir.box>) -> () + sync all(stat=sync_status, errmsg=error_message) + +end program test_sync_all diff --git a/flang/test/Lower/Coarray/sync_images.f90 b/flang/test/Lower/Coarray/sync_images.f90 new file mode 100644 index 0000000000000..0224bf235c36c --- /dev/null +++ b/flang/test/Lower/Coarray/sync_images.f90 @@ -0,0 +1,62 @@ +! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s --check-prefixes=COARRAY +! RUN: not %flang_fc1 -emit-hlfir %s 2>&1 | FileCheck %s --check-prefixes=NOCOARRAY + +program test_sync_images + implicit none + ! NOCOARRAY: Not yet implemented: Multi-image features are experimental and are disabled by default, use '-fcoarray' to enable. + + ! COARRAY: %[[ERRMSG:.*]]:2 = hlfir.declare %[[VAL_1:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) + ! COARRAY: %[[ME:.*]]:2 = hlfir.declare %[[VAL_3:.*]] {uniq_name = "_QFEme"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! COARRAY: %[[STAT:.*]]:2 = hlfir.declare %[[VAL_2:.*]] {uniq_name = "_QFEsync_status"} : (!fir.ref) -> (!fir.ref, !fir.ref) + integer sync_status, me + character(len=128) :: error_message + + ! COARRAY: %[[VAL_1:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref>) -> !fir.box> + ! COARRAY: %[[VAL_2:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_3:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_4:.*]] = fir.convert %[[VAL_1]] : (!fir.box>) -> !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_2]], %[[STAT]]#0, %[[VAL_4]], %[[VAL_3]]) fastmath : (!fir.box>, !fir.ref, !fir.box>, !fir.box>) -> () + sync images(*, stat=sync_status, errmsg=error_message) + + ! COARRAY: %[[VAL_5:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref>) -> !fir.box> + ! COARRAY: %[[VAL_6:.*]] = fir.embox %[[ME]]#0 : (!fir.ref) -> !fir.box + ! COARRAY: %[[VAL_7:.*]] = fir.rebox %[[VAL_6]](%[[SHAPE:.*]]) : (!fir.box, !fir.shape<1>) -> !fir.box> + ! COARRAY: %[[VAL_8:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_9:.*]] = fir.convert %[[VAL_7]] : (!fir.box>) -> !fir.box> + ! COARRAY: %[[VAL_10:.*]] = fir.convert %[[VAL_5]] : (!fir.box>) -> !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_9]], %[[STAT]]#0, %[[VAL_10]], %[[VAL_8]]) fastmath : (!fir.box>, !fir.ref, !fir.box>, !fir.box>) -> () + sync images(me, stat=sync_status, errmsg=error_message) + + ! COARRAY: %[[VAL_11:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref>) -> !fir.box> + ! COARRAY: %[[VAL_12:.*]] = fir.embox %[[IMG_SET:.*]]#0(%[[SHAPE_1:.*]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + ! COARRAY: %[[VAL_13:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_14:.*]] = fir.convert %[[VAL_12]] : (!fir.box>) -> !fir.box> + ! COARRAY: %[[VAL_15:.*]] = fir.convert %[[VAL_11]] : (!fir.box>) -> !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_14]], %[[STAT]]#0, %[[VAL_15]], %[[VAL_13]]) fastmath : (!fir.box>, !fir.ref, !fir.box>, !fir.box>) -> () + sync images([1], stat=sync_status, errmsg=error_message) + + ! COARRAY: %[[VAL_17:.*]] = fir.absent !fir.ref + ! COARRAY: %[[VAL_18:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_19:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_20:.*]] = fir.absent !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_19]], %[[VAL_17]], %[[VAL_18]], %[[VAL_20]]) fastmath : (!fir.box>, !fir.ref, !fir.box>, !fir.box>) -> () + sync images(*) + + ! COARRAY: %[[VAL_23:.*]] = fir.absent !fir.ref + ! COARRAY: %[[VAL_24:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_21:.*]] = fir.embox %[[ME]]#0 : (!fir.ref) -> !fir.box + ! COARRAY: %[[VAL_22:.*]] = fir.rebox %[[VAL_21]](%[[SHAPE_2:.*]]) : (!fir.box, !fir.shape<1>) -> !fir.box> + ! COARRAY: %[[VAL_25:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_26:.*]] = fir.convert %[[VAL_22]] : (!fir.box>) -> !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_26]], %[[VAL_23]], %[[VAL_24]], %[[VAL_25]]) fastmath : (!fir.box>, !fir.ref, !fir.box>, !fir.box>) -> () + sync images(me) + + ! COARRAY: %[[VAL_28:.*]] = fir.absent !fir.ref + ! COARRAY: %[[VAL_29:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_27:.*]] = fir.embox %[[IMG_SET:.*]]#0(%[[SHAPE_3:.*]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + ! COARRAY: %[[VAL_30:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_31:.*]] = fir.convert %[[VAL_27]] : (!fir.box>) -> !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_images(%[[VAL_31]], %[[VAL_28]], %[[VAL_29]], %[[VAL_30]]) fastmath : (!fir.box>, !fir.ref, !fir.box>, !fir.box>) -> () + sync images([1]) + +end program test_sync_images diff --git a/flang/test/Lower/Coarray/sync_memory.f90 b/flang/test/Lower/Coarray/sync_memory.f90 new file mode 100644 index 0000000000000..773cb6fe4efb7 --- /dev/null +++ b/flang/test/Lower/Coarray/sync_memory.f90 @@ -0,0 +1,37 @@ +! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s --check-prefixes=COARRAY +! RUN: not %flang_fc1 -emit-hlfir %s 2>&1 | FileCheck %s --check-prefixes=NOCOARRAY + +program test_sync_memory + implicit none + ! NOCOARRAY: Not yet implemented: Multi-image features are experimental and are disabled by default, use '-fcoarray' to enable. + + ! COARRAY: %[[ERRMSG:.*]]:2 = hlfir.declare %[[VAL_1:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) + ! COARRAY: %[[STAT:.*]]:2 = hlfir.declare %[[VAL_2:.*]] {uniq_name = "_QFEsync_status"} : (!fir.ref) -> (!fir.ref, !fir.ref) + integer sync_status + character(len=128) :: error_message + + ! COARRAY: %[[VAL_3:.*]] = fir.absent !fir.ref + ! COARRAY: %[[VAL_4:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_5:.*]] = fir.absent !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_memory(%[[VAL_3]], %[[VAL_4]], %[[VAL_5]]) fastmath : (!fir.ref, !fir.box>, !fir.box>) -> () + sync memory + + ! COARRAY: %[[VAL_6:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_7:.*]] = fir.absent !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_memory(%[[STAT]]#0, %[[VAL_6]], %[[VAL_7]]) fastmath : (!fir.ref, !fir.box>, !fir.box>) -> () + sync memory(stat=sync_status) + + ! COARRAY: %[[VAL_8:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref>) -> !fir.box> + ! COARRAY: %[[VAL_9:.*]] = fir.absent !fir.ref + ! COARRAY: %[[VAL_10:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_11:.*]] = fir.convert %[[VAL_8]] : (!fir.box>) -> !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_memory(%[[VAL_9]], %[[VAL_11]], %[[VAL_10]]) fastmath : (!fir.ref, !fir.box>, !fir.box>) -> () + sync memory( errmsg=error_message) + + ! COARRAY: %[[VAL_12:.*]] = fir.embox %[[ERRMSG]]#0 : (!fir.ref>) -> !fir.box> + ! COARRAY: %[[VAL_13:.*]] = fir.absent !fir.box> + ! COARRAY: %[[VAL_14:.*]] = fir.convert %[[VAL_12]] : (!fir.box>) -> !fir.box> + ! COARRAY: fir.call @_QMprifPprif_sync_memory(%[[STAT]]#0, %[[VAL_14]], %[[VAL_13]]) fastmath : (!fir.ref, !fir.box>, !fir.box>) -> () + sync memory(stat=sync_status, errmsg=error_message) + +end program test_sync_memory From 03e3ce82b926a4c138e6e0bacfcd1d5572c3e380 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 12 Sep 2025 14:32:42 +0100 Subject: [PATCH 121/734] [mlir][Transforms] Fix crash in `reconcile-unrealized-casts` (#158067) The `reconcile-unrealized-casts` pass used to crash when the input contains circular chains of `unrealized_conversion_cast` ops. Furthermore, the `reconcileUnrealizedCasts` helper functions used to erase ops that were not passed via the `castOps` operand. Such ops are now preserved. That's why some integration tests had to be changed. Also avoid copying the set of all unresolved materializations in `convertOperations`. This commit is in preparation of turning `RewriterBase::replaceOp` into a non-virtual function. --------- Co-authored-by: Mehdi Amini --- .../mlir/Transforms/DialectConversion.h | 3 + .../Transforms/Utils/DialectConversion.cpp | 151 +++++++++++++----- .../reconcile-unrealized-casts.mlir | 50 ++++++ ...assume-alignment-runtime-verification.mlir | 3 +- .../atomic-rmw-runtime-verification.mlir | 3 +- .../MemRef/store-runtime-verification.mlir | 3 +- 6 files changed, 171 insertions(+), 42 deletions(-) diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index a096f82a4cfd8..f8caae3ce9995 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -1428,6 +1428,9 @@ struct ConversionConfig { /// /// In the above example, %0 can be used instead of %3 and all cast ops are /// folded away. +void reconcileUnrealizedCasts( + const DenseSet &castOps, + SmallVectorImpl *remainingCastOps = nullptr); void reconcileUnrealizedCasts( ArrayRef castOps, SmallVectorImpl *remainingCastOps = nullptr); diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index df9700f11200f..d53e1e78f2027 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -3100,6 +3100,7 @@ unsigned OperationLegalizer::applyCostModelToPatterns( //===----------------------------------------------------------------------===// // OperationConverter //===----------------------------------------------------------------------===// + namespace { enum OpConversionMode { /// In this mode, the conversion will ignore failed conversions to allow @@ -3117,6 +3118,13 @@ enum OpConversionMode { } // namespace namespace mlir { + +// Predeclaration only. +static void reconcileUnrealizedCasts( + const DenseMap + &castOps, + SmallVectorImpl *remainingCastOps); + // This class converts operations to a given conversion target via a set of // rewrite patterns. The conversion behaves differently depending on the // conversion mode. @@ -3264,18 +3272,13 @@ LogicalResult OperationConverter::convertOperations(ArrayRef ops) { // After a successful conversion, apply rewrites. rewriterImpl.applyRewrites(); - // Gather all unresolved materializations. - SmallVector allCastOps; - const DenseMap - &materializations = rewriterImpl.unresolvedMaterializations; - for (auto it : materializations) - allCastOps.push_back(it.first); - // Reconcile all UnrealizedConversionCastOps that were inserted by the - // dialect conversion frameworks. (Not the one that were inserted by + // dialect conversion frameworks. (Not the ones that were inserted by // patterns.) + const DenseMap + &materializations = rewriterImpl.unresolvedMaterializations; SmallVector remainingCastOps; - reconcileUnrealizedCasts(allCastOps, &remainingCastOps); + reconcileUnrealizedCasts(materializations, &remainingCastOps); // Drop markers. for (UnrealizedConversionCastOp castOp : remainingCastOps) @@ -3303,20 +3306,19 @@ LogicalResult OperationConverter::convertOperations(ArrayRef ops) { // Reconcile Unrealized Casts //===----------------------------------------------------------------------===// -void mlir::reconcileUnrealizedCasts( - ArrayRef castOps, +/// Try to reconcile all given UnrealizedConversionCastOps and store the +/// left-over ops in `remainingCastOps` (if provided). See documentation in +/// DialectConversion.h for more details. +/// The `isCastOpOfInterestFn` is used to filter the cast ops to proceed: the +/// algorithm may visit an operand (or user) which is a cast op, but will not +/// try to reconcile it if not in the filtered set. +template +static void reconcileUnrealizedCastsImpl( + RangeT castOps, + function_ref isCastOpOfInterestFn, SmallVectorImpl *remainingCastOps) { + // A worklist of cast ops to process. SetVector worklist(llvm::from_range, castOps); - // This set is maintained only if `remainingCastOps` is provided. - DenseSet erasedOps; - - // Helper function that adds all operands to the worklist that are an - // unrealized_conversion_cast op result. - auto enqueueOperands = [&](UnrealizedConversionCastOp castOp) { - for (Value v : castOp.getInputs()) - if (auto inputCastOp = v.getDefiningOp()) - worklist.insert(inputCastOp); - }; // Helper function that return the unrealized_conversion_cast op that // defines all inputs of the given op (in the same order). Return "nullptr" @@ -3337,39 +3339,110 @@ void mlir::reconcileUnrealizedCasts( // Process ops in the worklist bottom-to-top. while (!worklist.empty()) { UnrealizedConversionCastOp castOp = worklist.pop_back_val(); - if (castOp->use_empty()) { - // DCE: If the op has no users, erase it. Add the operands to the - // worklist to find additional DCE opportunities. - enqueueOperands(castOp); - if (remainingCastOps) - erasedOps.insert(castOp.getOperation()); - castOp->erase(); - continue; - } // Traverse the chain of input cast ops to see if an op with the same // input types can be found. UnrealizedConversionCastOp nextCast = castOp; while (nextCast) { if (nextCast.getInputs().getTypes() == castOp.getResultTypes()) { + if (llvm::any_of(nextCast.getInputs(), [&](Value v) { + return v.getDefiningOp() == castOp; + })) { + // Ran into a cycle. + break; + } + // Found a cast where the input types match the output types of the - // matched op. We can directly use those inputs and the matched op can - // be removed. - enqueueOperands(castOp); + // matched op. We can directly use those inputs. castOp.replaceAllUsesWith(nextCast.getInputs()); - if (remainingCastOps) - erasedOps.insert(castOp.getOperation()); - castOp->erase(); break; } nextCast = getInputCast(nextCast); } } - if (remainingCastOps) - for (UnrealizedConversionCastOp op : castOps) - if (!erasedOps.contains(op.getOperation())) + // A set of all alive cast ops. I.e., ops whose results are (transitively) + // used by an op that is not a cast op. + DenseSet liveOps; + + // Helper function that marks the given op and transitively reachable input + // cast ops as alive. + auto markOpLive = [&](Operation *rootOp) { + SmallVector worklist; + worklist.push_back(rootOp); + while (!worklist.empty()) { + Operation *op = worklist.pop_back_val(); + if (liveOps.insert(op).second) { + // Successfully inserted: process reachable input cast ops. + for (Value v : op->getOperands()) + if (auto castOp = v.getDefiningOp()) + if (isCastOpOfInterestFn(castOp)) + worklist.push_back(castOp); + } + } + }; + + // Find all alive cast ops. + for (UnrealizedConversionCastOp op : castOps) { + // The op may have been marked live already as being an operand of another + // live cast op. + if (liveOps.contains(op.getOperation())) + continue; + // If any of the users is not a cast op, mark the current op (and its + // input ops) as live. + if (llvm::any_of(op->getUsers(), [&](Operation *user) { + auto castOp = dyn_cast(user); + return !castOp || !isCastOpOfInterestFn(castOp); + })) + markOpLive(op); + } + + // Erase all dead cast ops. + for (UnrealizedConversionCastOp op : castOps) { + if (liveOps.contains(op)) { + // Op is alive and was not erased. Add it to the remaining cast ops. + if (remainingCastOps) remainingCastOps->push_back(op); + continue; + } + + // Op is dead. Erase it. + op->dropAllUses(); + op->erase(); + } +} + +void mlir::reconcileUnrealizedCasts( + ArrayRef castOps, + SmallVectorImpl *remainingCastOps) { + // Set of all cast ops for faster lookups. + DenseSet castOpSet; + for (UnrealizedConversionCastOp op : castOps) + castOpSet.insert(op); + reconcileUnrealizedCasts(castOpSet, remainingCastOps); +} + +void mlir::reconcileUnrealizedCasts( + const DenseSet &castOps, + SmallVectorImpl *remainingCastOps) { + reconcileUnrealizedCastsImpl( + llvm::make_range(castOps.begin(), castOps.end()), + [&](UnrealizedConversionCastOp castOp) { + return castOps.contains(castOp); + }, + remainingCastOps); +} + +static void mlir::reconcileUnrealizedCasts( + const DenseMap + &castOps, + SmallVectorImpl *remainingCastOps) { + reconcileUnrealizedCastsImpl( + castOps.keys(), + [&](UnrealizedConversionCastOp castOp) { + return castOps.contains(castOp); + }, + remainingCastOps); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir b/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir index 3573114f5e038..ac5ca321c066f 100644 --- a/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir +++ b/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir @@ -194,3 +194,53 @@ func.func @emptyCast() -> index { %0 = builtin.unrealized_conversion_cast to index return %0 : index } + +// ----- + +// CHECK-LABEL: test.graph_region +// CHECK-NEXT: "test.return"() : () -> () +test.graph_region { + %0 = builtin.unrealized_conversion_cast %2 : i32 to i64 + %1 = builtin.unrealized_conversion_cast %0 : i64 to i16 + %2 = builtin.unrealized_conversion_cast %1 : i16 to i32 + "test.return"() : () -> () +} + +// ----- + +// CHECK-LABEL: test.graph_region +// CHECK-NEXT: %[[cast0:.*]] = builtin.unrealized_conversion_cast %[[cast2:.*]] : i32 to i64 +// CHECK-NEXT: %[[cast1:.*]] = builtin.unrealized_conversion_cast %[[cast0]] : i64 to i16 +// CHECK-NEXT: %[[cast2]] = builtin.unrealized_conversion_cast %[[cast1]] : i16 to i32 +// CHECK-NEXT: "test.user"(%[[cast2]]) : (i32) -> () +// CHECK-NEXT: "test.return"() : () -> () +test.graph_region { + %0 = builtin.unrealized_conversion_cast %2 : i32 to i64 + %1 = builtin.unrealized_conversion_cast %0 : i64 to i16 + %2 = builtin.unrealized_conversion_cast %1 : i16 to i32 + "test.user"(%2) : (i32) -> () + "test.return"() : () -> () +} + +// ----- + +// CHECK-LABEL: test.graph_region +// CHECK-NEXT: "test.return"() : () -> () +test.graph_region { + %0 = builtin.unrealized_conversion_cast %0 : i32 to i32 + "test.return"() : () -> () +} + +// ----- + +// CHECK-LABEL: test.graph_region +// CHECK-NEXT: %[[c0:.*]] = arith.constant +// CHECK-NEXT: %[[cast:.*]]:2 = builtin.unrealized_conversion_cast %[[c0]], %[[cast]]#1 : i32, i32 to i32, i32 +// CHECK-NEXT: "test.user"(%[[cast]]#0) : (i32) -> () +// CHECK-NEXT: "test.return"() : () -> () +test.graph_region { + %cst = arith.constant 0 : i32 + %0, %1 = builtin.unrealized_conversion_cast %cst, %1 : i32, i32 to i32, i32 + "test.user"(%0) : (i32) -> () + "test.return"() : () -> () +} diff --git a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir index 25a338df8d790..01a826a638606 100644 --- a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir @@ -1,7 +1,8 @@ // RUN: mlir-opt %s -generate-runtime-verification \ // RUN: -expand-strided-metadata \ // RUN: -test-cf-assert \ -// RUN: -convert-to-llvm | \ +// RUN: -convert-to-llvm \ +// RUN: -reconcile-unrealized-casts | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir index 4c6a48d577a6c..1144a7caf36e8 100644 --- a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir @@ -1,6 +1,7 @@ // RUN: mlir-opt %s -generate-runtime-verification \ // RUN: -test-cf-assert \ -// RUN: -convert-to-llvm | \ +// RUN: -convert-to-llvm \ +// RUN: -reconcile-unrealized-casts | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir index dd000c6904bcb..82e63805cd027 100644 --- a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir @@ -1,6 +1,7 @@ // RUN: mlir-opt %s -generate-runtime-verification \ // RUN: -test-cf-assert \ -// RUN: -convert-to-llvm | \ +// RUN: -convert-to-llvm \ +// RUN: -reconcile-unrealized-casts | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ // RUN: FileCheck %s From e8f61801c6237e56b3d69190af7a7acfdcede1e8 Mon Sep 17 00:00:00 2001 From: Christopher Di Bella Date: Fri, 12 Sep 2025 06:33:10 -0700 Subject: [PATCH 122/734] [libcxx] adds size-based `__split_buffer` representation to unstable ABI (#139632) **tl;dr** We can significantly improve the runtime performance of `std::vector` by changing its representation from three pointers to one pointer and two integers. This document explains the details of this change, along with the justifications for making it. See the [RFC] for more information. `vector` depends on `__split_buffer` for inserting elements. Changing `__split_buffer` to match `vector`'s representation simplifies the model, as it eliminates the need to convert between two different representations of a contiguous buffer in the same configuration of libc++. [RFC]: https://discourse.llvm.org/t/adding-a-size-based-vector-to-libc-s-unstable-abi/86306 --------- Co-authored-by: Jorge Gorbe Moya --- libcxx/include/__split_buffer | 840 +++++++++++++----- libcxx/include/__vector/vector.h | 67 +- libcxx/include/deque | 37 +- .../is_replaceable.compile.pass.cpp | 58 +- .../is_trivially_relocatable.compile.pass.cpp | 24 +- lldb/examples/synthetic/libcxx.py | 78 +- 6 files changed, 805 insertions(+), 299 deletions(-) diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer index 21e58f4abc6b3..15368a3bc8955 100644 --- a/libcxx/include/__split_buffer +++ b/libcxx/include/__split_buffer @@ -13,10 +13,12 @@ #include <__algorithm/max.h> #include <__algorithm/move.h> #include <__algorithm/move_backward.h> +#include <__assert> #include <__config> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/move_iterator.h> +#include <__memory/addressof.h> #include <__memory/allocate_at_least.h> #include <__memory/allocator.h> #include <__memory/allocator_traits.h> @@ -45,25 +47,434 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -// __split_buffer allocates a contiguous chunk of memory and stores objects in the range [__begin_, __end_). -// It has uninitialized memory in the ranges [__first_, __begin_) and [__end_, __cap_). That allows -// it to grow both in the front and back without having to move the data. +template class _Layout> +class __split_buffer; + +template +class __split_buffer_pointer_layout { +protected: + using value_type = _Tp; + using allocator_type = _Allocator; + using __alloc_rr _LIBCPP_NODEBUG = __libcpp_remove_reference_t; + using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<__alloc_rr>; + using reference = value_type&; + using const_reference = const value_type&; + using size_type = typename __alloc_traits::size_type; + using difference_type = typename __alloc_traits::difference_type; + using pointer = typename __alloc_traits::pointer; + using const_pointer = typename __alloc_traits::const_pointer; + using iterator = pointer; + using const_iterator = const_pointer; + using __sentinel_type _LIBCPP_NODEBUG = pointer; -template > -struct __split_buffer { public: - using value_type = _Tp; - using allocator_type = _Allocator; - using __alloc_rr _LIBCPP_NODEBUG = __libcpp_remove_reference_t; - using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<__alloc_rr>; - using reference = value_type&; - using const_reference = const value_type&; - using size_type = typename __alloc_traits::size_type; - using difference_type = typename __alloc_traits::difference_type; - using pointer = typename __alloc_traits::pointer; - using const_pointer = typename __alloc_traits::const_pointer; - using iterator = pointer; - using const_iterator = const_pointer; + // Can't be defaulted due to _LIBCPP_COMPRESSED_PAIR not being an aggregate in C++03 and C++11. + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer_pointer_layout() : __back_cap_(nullptr) {} + + _LIBCPP_CONSTEXPR_SINCE_CXX20 + _LIBCPP_HIDE_FROM_ABI explicit __split_buffer_pointer_layout(const allocator_type& __alloc) + : __back_cap_(nullptr), __alloc_(__alloc) {} + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer __front_cap() _NOEXCEPT { return __front_cap_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer __front_cap() const _NOEXCEPT { + return __front_cap_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer begin() _NOEXCEPT { return __begin_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer begin() const _NOEXCEPT { return __begin_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() _NOEXCEPT { return __end_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() const _NOEXCEPT { return __end_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { + return static_cast(__end_ - __begin_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __begin_ == __end_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT { + return static_cast(__back_cap_ - __front_cap_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type& __get_allocator() _NOEXCEPT { return __alloc_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type const& __get_allocator() const _NOEXCEPT { + return __alloc_; + } + + // Returns the sentinel object directly. Should be used in conjunction with automatic type deduction, + // not explicit types. + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_sentinel() const _NOEXCEPT { + return __end_; + } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_capacity() const _NOEXCEPT { + return __back_cap_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_data(pointer __new_first) _NOEXCEPT { + __front_cap_ = __new_first; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __set_valid_range(pointer __new_begin, pointer __new_end) _NOEXCEPT { + __begin_ = __new_begin; + __end_ = __new_end; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __set_valid_range(pointer __new_begin, size_type __new_size) _NOEXCEPT { + __begin_ = __new_begin; + __end_ = __begin_ + __new_size; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(pointer __new_end) _NOEXCEPT { + _LIBCPP_ASSERT_INTERNAL(__front_cap_ <= __new_end, "__new_end cannot precede __front_cap_"); + __end_ = __new_end; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(size_type __new_size) _NOEXCEPT { + __end_ = __begin_ + __new_size; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(size_type __new_capacity) _NOEXCEPT { + __back_cap_ = __front_cap_ + __new_capacity; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(pointer __new_capacity) _NOEXCEPT { + __back_cap_ = __new_capacity; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const _NOEXCEPT { + return static_cast(__begin_ - __front_cap_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const _NOEXCEPT { + return static_cast(__back_cap_ - __end_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT { return *(__end_ - 1); } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT { return *(__end_ - 1); } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator( + __split_buffer_pointer_layout<__split_buffer, + value_type, + __alloc_rr&>& __other) _NOEXCEPT { + std::swap(__front_cap_, __other.__front_cap_); + std::swap(__begin_, __other.__begin_); + std::swap(__back_cap_, __other.__back_cap_); + std::swap(__end_, __other.__end_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer_pointer_layout& __other) _NOEXCEPT { + std::swap(__front_cap_, __other.__front_cap_); + std::swap(__begin_, __other.__begin_); + std::swap(__back_cap_, __other.__back_cap_); + std::swap(__end_, __other.__end_); + std::__swap_allocator(__alloc_, __other.__alloc_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __reset() _NOEXCEPT { + __front_cap_ = nullptr; + __begin_ = nullptr; + __end_ = nullptr; + __back_cap_ = nullptr; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __copy_without_alloc(__split_buffer_pointer_layout const& __other) + _NOEXCEPT_(is_nothrow_copy_assignable::value) { + __front_cap_ = __other.__front_cap_; + __begin_ = __other.__begin_; + __end_ = __other.__end_; + __back_cap_ = __other.__back_cap_; + } + +private: + pointer __front_cap_ = nullptr; + pointer __begin_ = nullptr; + pointer __end_ = nullptr; + _LIBCPP_COMPRESSED_PAIR(pointer, __back_cap_, allocator_type, __alloc_); + + template + friend class __split_buffer_pointer_layout; +}; + +template +class __split_buffer_size_layout { +protected: + using value_type = _Tp; + using allocator_type = _Allocator; + using __alloc_rr _LIBCPP_NODEBUG = __libcpp_remove_reference_t; + using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<__alloc_rr>; + using reference = value_type&; + using const_reference = const value_type&; + using size_type = typename __alloc_traits::size_type; + using difference_type = typename __alloc_traits::difference_type; + using pointer = typename __alloc_traits::pointer; + using const_pointer = typename __alloc_traits::const_pointer; + using iterator = pointer; + using const_iterator = const_pointer; + using __sentinel_type _LIBCPP_NODEBUG = size_type; + +public: + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer_size_layout() = default; + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer_size_layout(const allocator_type& __alloc) + : __alloc_(__alloc) {} + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer __front_cap() _NOEXCEPT { return __front_cap_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer __front_cap() const _NOEXCEPT { + return __front_cap_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer begin() _NOEXCEPT { return __begin_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer begin() const _NOEXCEPT { return __begin_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() _NOEXCEPT { return __begin_ + __size_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() const _NOEXCEPT { return __begin_ + __size_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __size_ == 0; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT { return __cap_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type& __get_allocator() _NOEXCEPT { return __alloc_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type const& __get_allocator() const _NOEXCEPT { + return __alloc_; + } + + // Returns the sentinel object directly. Should be used in conjunction with automatic type deduction, + // not explicit types. + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_sentinel() const _NOEXCEPT { + return __size_; + } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_capacity() const _NOEXCEPT { + return __cap_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_data(pointer __new_first) _NOEXCEPT { + __front_cap_ = __new_first; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __set_valid_range(pointer __new_begin, pointer __new_end) _NOEXCEPT { + // Size-based __split_buffers track their size directly: we need to explicitly update the size + // when the front is adjusted. + __size_ -= __new_begin - __begin_; + __begin_ = __new_begin; + __set_sentinel(__new_end); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __set_valid_range(pointer __new_begin, size_type __new_size) _NOEXCEPT { + // Size-based __split_buffers track their size directly: we need to explicitly update the size + // when the front is adjusted. + __size_ -= __new_begin - __begin_; + __begin_ = __new_begin; + __set_sentinel(__new_size); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(pointer __new_end) _NOEXCEPT { + _LIBCPP_ASSERT_INTERNAL(__front_cap_ <= __new_end, "__new_end cannot precede __front_cap_"); + __size_ += __new_end - end(); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(size_type __new_size) _NOEXCEPT { + __size_ = __new_size; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(size_type __new_capacity) _NOEXCEPT { + __cap_ = __new_capacity; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(pointer __new_capacity) _NOEXCEPT { + __cap_ = __new_capacity - __begin_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const _NOEXCEPT { + return static_cast(__begin_ - __front_cap_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const _NOEXCEPT { + // `__cap_ - __end_` tells us the total number of spares when in size-mode. We need to remove + // the __front_spare from the count. + return __cap_ - __size_ - __front_spare(); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT { return __begin_[__size_ - 1]; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT { + return __begin_[__size_ - 1]; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator( + __split_buffer_pointer_layout<__split_buffer, + value_type, + __alloc_rr&>& __other) _NOEXCEPT { + std::swap(__front_cap_, __other.__front_cap_); + std::swap(__begin_, __other.__begin_); + std::swap(__cap_, __other.__cap_); + std::swap(__size_, __other.__size_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer_size_layout& __other) _NOEXCEPT { + std::swap(__front_cap_, __other.__front_cap_); + std::swap(__begin_, __other.__begin_); + std::swap(__cap_, __other.__cap_); + std::swap(__size_, __other.__size_); + std::__swap_allocator(__alloc_, __other.__alloc_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __reset() _NOEXCEPT { + __front_cap_ = nullptr; + __begin_ = nullptr; + __size_ = 0; + __cap_ = 0; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __copy_without_alloc(__split_buffer_size_layout const& __other) + _NOEXCEPT_(is_nothrow_copy_assignable::value) { + __front_cap_ = __other.__front_cap_; + __begin_ = __other.__begin_; + __cap_ = __other.__cap_; + __size_ = __other.__size_; + } + +private: + pointer __front_cap_ = nullptr; + pointer __begin_ = nullptr; + size_type __size_ = 0; + size_type __cap_ = 0; + _LIBCPP_NO_UNIQUE_ADDRESS allocator_type __alloc_; + + template + friend class __split_buffer_size_layout; +}; + +// `__split_buffer` is a contiguous array data structure. It may hold spare capacity at both ends of +// the sequence. This allows for a `__split_buffer` to grow from both the front and the back without +// relocating its contents until it runs out of room. This characteristic sets it apart from +// `std::vector`, which only holds spare capacity at its end. As such, `__split_buffer` is useful +// for implementing both `std::vector` and `std::deque`. +// +// The sequence is stored as a contiguous chunk of memory delimited by the following "pointers" (`o` denotes +// uninitialized memory and `x` denotes a valid object): +// +// |oooooooooooooooooooxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxoooooooooooooooooooooooo| +// ^ ^ ^ ^ +// __front_cap_ __begin_ __end_ __back_cap_ +// +// The range [__front_cap_, __begin_) contains uninitialized memory. It is referred to as the "front spare capacity". +// The range [__begin_, __end_) contains valid objects. It is referred to as the "valid range". +// The range [__end_, __back_cap_) contains uninitialized memory. It is referred to as the "back spare capacity". +// +// The layout of `__split_buffer` is determined by the `_Layout` template template parameter. This +// `_Layout` allows the above pointers to be stored as different representations, such as integer +// offsets. A layout class template must provide the following interface: +// +// template +// class __layout { +// protected: +// using value_type = _Tp; +// using allocator_type = _Allocator; +// using __alloc_rr = __libcpp_remove_reference_t; +// using __alloc_traits = allocator_traits<__alloc_rr>; +// using reference = value_type&; +// using const_reference = const value_type&; +// using size_type = typename __alloc_traits::size_type; +// using difference_type = typename __alloc_traits::difference_type; +// using pointer = typename __alloc_traits::pointer; +// using const_pointer = typename __alloc_traits::const_pointer; +// using iterator = pointer; +// using const_iterator = const_pointer; +// using __sentinel_type = /* type that represents the layout's sentinel */; +// +// public: +// __layout() = default; +// explicit __layout(const allocator_type&); +// +// pointer __front_cap(); +// const_pointer __front_cap() const; +// +// pointer begin(); +// const_pointer begin() const; +// +// pointer end(); +// pointer end() const; +// +// size_type size() const; +// bool empty() const; +// size_type capacity() const; +// +// allocator_type& __get_allocator(); +// allocator_type const& __get_allocator() const; +// +// __sentinel_type __raw_sentinel() const; +// __sentinel_type __raw_capacity() const; +// +// void __set_data(pointer); +// void __set_valid_range(pointer __begin, pointer __end); +// void __set_valid_range(pointer __begin, size_type __size); +// void __set_sentinel(pointer __end); +// void __set_sentinel(size_type __size); +// +// void __set_capacity(size_type __capacity); +// void __set_capacity(pointer __capacity); +// +// size_type __front_spare() const; +// size_type __back_spare() const; +// +// reference back(); +// const_reference back() const; +// +// template +// void __swap_without_allocator(_OtherLayout&); +// void swap(__layout&); +// +// void __reset(); +// void __copy_without_alloc(__layout const&); +// }; +// +template class _Layout> +class __split_buffer : _Layout<__split_buffer<_Tp, _Allocator, _Layout>, _Tp, _Allocator> { + using __base_type _LIBCPP_NODEBUG = _Layout<__split_buffer<_Tp, _Allocator, _Layout>, _Tp, _Allocator>; + +public: + using __base_type::__back_spare; + using __base_type::__copy_without_alloc; + using __base_type::__front_cap; + using __base_type::__front_spare; + using __base_type::__get_allocator; + using __base_type::__raw_capacity; + using __base_type::__raw_sentinel; + using __base_type::__reset; + using __base_type::__set_capacity; + using __base_type::__set_data; + using __base_type::__set_sentinel; + using __base_type::__set_valid_range; + + using typename __base_type::__alloc_rr; + using typename __base_type::__alloc_traits; + using typename __base_type::allocator_type; + using typename __base_type::const_iterator; + using typename __base_type::const_pointer; + using typename __base_type::const_reference; + using typename __base_type::difference_type; + using typename __base_type::iterator; + using typename __base_type::pointer; + using typename __base_type::reference; + using typename __base_type::size_type; + using typename __base_type::value_type; // A __split_buffer contains the following members which may be trivially relocatable: // - pointer: may be trivially relocatable, so it's checked @@ -78,23 +489,15 @@ public: __split_buffer, void>; - pointer __first_; - pointer __begin_; - pointer __end_; - _LIBCPP_COMPRESSED_PAIR(pointer, __cap_, allocator_type, __alloc_); - __split_buffer(const __split_buffer&) = delete; __split_buffer& operator=(const __split_buffer&) = delete; - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer() - _NOEXCEPT_(is_nothrow_default_constructible::value) - : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr) {} + _LIBCPP_HIDE_FROM_ABI __split_buffer() = default; - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(__alloc_rr& __a) - : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr), __alloc_(__a) {} + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(__alloc_rr& __a) : __base_type(__a) {} _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(const __alloc_rr& __a) - : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr), __alloc_(__a) {} + : __base_type(__a) {} _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(size_type __cap, size_type __start, __alloc_rr& __a); @@ -111,36 +514,16 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~__split_buffer(); - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __begin_; } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __begin_; } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __end_; } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __end_; } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __destruct_at_end(__begin_); } + using __base_type::back; + using __base_type::begin; + using __base_type::capacity; + using __base_type::empty; + using __base_type::end; + using __base_type::size; - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const { - return static_cast(__end_ - __begin_); - } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const { return __end_ == __begin_; } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const { - return static_cast(__cap_ - __first_); - } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const { - return static_cast(__begin_ - __first_); - } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const { - return static_cast(__cap_ - __end_); - } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() { return *__begin_; } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const { return *__begin_; } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() { return *(__end_ - 1); } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const { return *(__end_ - 1); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __destruct_at_end(begin()); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() { return *begin(); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const { return *begin(); } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void shrink_to_fit() _NOEXCEPT; @@ -149,8 +532,8 @@ public: template _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void emplace_back(_Args&&... __args); - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_front() { __destruct_at_begin(__begin_ + 1); } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_back() { __destruct_at_end(__end_ - 1); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_front() { __destruct_at_begin(begin() + 1); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_back() { __destruct_at_end(end() - 1); } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __construct_at_end(size_type __n); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __construct_at_end(size_type __n, const_reference __x); @@ -184,242 +567,240 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer& __x) _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>); - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __invariants() const; + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __invariants() const { + if (__front_cap() == nullptr) { + if (begin() != nullptr) + return false; + + if (!empty()) + return false; + + if (capacity() != 0) + return false; + + return true; + } else { + if (begin() < __front_cap()) + return false; + + if (capacity() < size()) + return false; + + if (end() < begin()) + return false; + + return true; + } + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __swap_without_allocator(__split_buffer& __other) _NOEXCEPT { + __base_type::__swap_without_allocator(__other); + } private: _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__split_buffer& __c, true_type) _NOEXCEPT_(is_nothrow_move_assignable::value) { - __alloc_ = std::move(__c.__alloc_); + __get_allocator() = std::move(__c.__get_allocator()); } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__split_buffer&, false_type) _NOEXCEPT {} struct _ConstructTransaction { _LIBCPP_CONSTEXPR_SINCE_CXX20 - _LIBCPP_HIDE_FROM_ABI explicit _ConstructTransaction(pointer* __p, size_type __n) _NOEXCEPT - : __pos_(*__p), - __end_(*__p + __n), - __dest_(__p) {} + _LIBCPP_HIDE_FROM_ABI explicit _ConstructTransaction(__split_buffer* __parent, pointer __p, size_type __n) _NOEXCEPT + : __pos_(__p), + __end_(__p + __n), + __parent_(__parent) {} - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~_ConstructTransaction() { *__dest_ = __pos_; } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~_ConstructTransaction() { __parent_->__set_sentinel(__pos_); } pointer __pos_; const pointer __end_; private: - pointer* __dest_; + __split_buffer* __parent_; }; -}; -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 bool __split_buffer<_Tp, _Allocator>::__invariants() const { - if (__first_ == nullptr) { - if (__begin_ != nullptr) - return false; - if (__end_ != nullptr) - return false; - if (__cap_ != nullptr) - return false; - } else { - if (__begin_ < __first_) - return false; - if (__end_ < __begin_) - return false; - if (__cap_ < __end_) - return false; - } - return true; -} + template class _L2> + friend class __split_buffer; +}; -// Default constructs __n objects starting at __end_ +// Default constructs __n objects starting at `end()` // throws if construction throws // Precondition: __n > 0 // Precondition: size() + __n <= capacity() // Postcondition: size() == size() + __n -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n) { - _ConstructTransaction __tx(std::addressof(this->__end_), __n); +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(size_type __n) { + _ConstructTransaction __tx(this, end(), __n); for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) { - __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_)); + __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_)); } } -// Copy constructs __n objects starting at __end_ from __x +// Copy constructs __n objects starting at `end()` from __x // throws if construction throws // Precondition: __n > 0 // Precondition: size() + __n <= capacity() // Postcondition: size() == old size() + __n // Postcondition: [i] == __x for all i in [size() - __n, __n) -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x) { - _ConstructTransaction __tx(std::addressof(this->__end_), __n); +__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(size_type __n, const_reference __x) { + _ConstructTransaction __tx(this, end(), __n); for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) { - __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_), __x); + __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_), __x); } } -template +template class _Layout> template _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__split_buffer<_Tp, _Allocator>::__construct_at_end_with_sentinel(_Iterator __first, _Sentinel __last) { - __alloc_rr& __a = __alloc_; +__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end_with_sentinel(_Iterator __first, _Sentinel __last) { + __alloc_rr& __a = __get_allocator(); for (; __first != __last; ++__first) { - if (__end_ == __cap_) { - size_type __old_cap = __cap_ - __first_; + if (__back_spare() == 0) { + size_type __old_cap = capacity(); size_type __new_cap = std::max(2 * __old_cap, 8); __split_buffer __buf(__new_cap, 0, __a); - for (pointer __p = __begin_; __p != __end_; ++__p, (void)++__buf.__end_) - __alloc_traits::construct(__buf.__alloc_, std::__to_address(__buf.__end_), std::move(*__p)); + pointer __buf_end = __buf.end(); + pointer __end = end(); + for (pointer __p = begin(); __p != __end; ++__p) { + __alloc_traits::construct(__buf.__get_allocator(), std::__to_address(__buf_end), std::move(*__p)); + __buf.__set_sentinel(++__buf_end); + } swap(__buf); } - __alloc_traits::construct(__a, std::__to_address(this->__end_), *__first); - ++this->__end_; + + __alloc_traits::construct(__a, std::__to_address(end()), *__first); + __set_sentinel(size() + 1); } } -template + +template class _Layout> template ::value, int> > _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__split_buffer<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) { +__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) { __construct_at_end_with_size(__first, std::distance(__first, __last)); } -template +template class _Layout> template _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__split_buffer<_Tp, _Allocator>::__construct_at_end_with_size(_ForwardIterator __first, size_type __n) { - _ConstructTransaction __tx(std::addressof(this->__end_), __n); +__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end_with_size(_ForwardIterator __first, size_type __n) { + _ConstructTransaction __tx(this, end(), __n); for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, (void)++__first) { - __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_), *__first); + __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_), *__first); } } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 inline void -__split_buffer<_Tp, _Allocator>::__destruct_at_begin(pointer __new_begin, false_type) { - while (__begin_ != __new_begin) - __alloc_traits::destroy(__alloc_, std::__to_address(__begin_++)); +__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_begin(pointer __new_begin, false_type) { + pointer __begin = begin(); + // Updating begin at every iteration is unnecessary because destruction can't throw. + while (__begin != __new_begin) + __alloc_traits::destroy(__get_allocator(), std::__to_address(__begin++)); + __set_valid_range(__begin, end()); } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 inline void -__split_buffer<_Tp, _Allocator>::__destruct_at_begin(pointer __new_begin, true_type) { - __begin_ = __new_begin; -} - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void -__split_buffer<_Tp, _Allocator>::__destruct_at_end(pointer __new_last, false_type) _NOEXCEPT { - while (__new_last != __end_) - __alloc_traits::destroy(__alloc_, std::__to_address(--__end_)); +__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_begin(pointer __new_begin, true_type) { + __set_valid_range(__new_begin, end()); } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void -__split_buffer<_Tp, _Allocator>::__destruct_at_end(pointer __new_last, true_type) _NOEXCEPT { - __end_ = __new_last; +__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_end(pointer __new_last, false_type) _NOEXCEPT { + pointer __end = end(); + // Updating begin at every iteration is unnecessary because destruction can't throw. + while (__new_last != __end) + __alloc_traits::destroy(__get_allocator(), std::__to_address(--__end)); + __set_sentinel(__end); } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 -__split_buffer<_Tp, _Allocator>::__split_buffer(size_type __cap, size_type __start, __alloc_rr& __a) - : __cap_(nullptr), __alloc_(__a) { - if (__cap == 0) { - __first_ = nullptr; - } else { - auto __allocation = std::__allocate_at_least(__alloc_, __cap); - __first_ = __allocation.ptr; - __cap = __allocation.count; +__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(size_type __cap, size_type __start, __alloc_rr& __a) + : __base_type(__a) { + _LIBCPP_ASSERT_INTERNAL(__cap >= __start, "can't have a start point outside the capacity"); + if (__cap > 0) { + auto __allocation = std::__allocate_at_least(__get_allocator(), __cap); + __set_data(__allocation.ptr); + __cap = __allocation.count; } - __begin_ = __end_ = __first_ + __start; - __cap_ = __first_ + __cap; + + pointer __begin = __front_cap() + __start; + __set_valid_range(__begin, __begin); + __set_capacity(__cap); } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>::~__split_buffer() { +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>::~__split_buffer() { clear(); - if (__first_) - __alloc_traits::deallocate(__alloc_, __first_, capacity()); + if (__front_cap()) + __alloc_traits::deallocate(__get_allocator(), __front_cap(), capacity()); } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>::__split_buffer(__split_buffer&& __c) +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c) _NOEXCEPT_(is_nothrow_move_constructible::value) - : __first_(std::move(__c.__first_)), - __begin_(std::move(__c.__begin_)), - __end_(std::move(__c.__end_)), - __cap_(std::move(__c.__cap_)), - __alloc_(std::move(__c.__alloc_)) { - __c.__first_ = nullptr; - __c.__begin_ = nullptr; - __c.__end_ = nullptr; - __c.__cap_ = nullptr; + : __base_type(std::move(__c)) { + __c.__reset(); } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 -__split_buffer<_Tp, _Allocator>::__split_buffer(__split_buffer&& __c, const __alloc_rr& __a) - : __cap_(nullptr), __alloc_(__a) { - if (__a == __c.__alloc_) { - __first_ = __c.__first_; - __begin_ = __c.__begin_; - __end_ = __c.__end_; - __cap_ = __c.__cap_; - __c.__first_ = nullptr; - __c.__begin_ = nullptr; - __c.__end_ = nullptr; - __c.__cap_ = nullptr; +__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c, const __alloc_rr& __a) + : __base_type(__a) { + if (__a == __c.__get_allocator()) { + __set_data(__c.__front_cap()); + __set_valid_range(__c.begin(), __c.end()); + __set_capacity(__c.capacity()); + __c.__reset(); } else { - auto __allocation = std::__allocate_at_least(__alloc_, __c.size()); - __first_ = __allocation.ptr; - __begin_ = __end_ = __first_; - __cap_ = __first_ + __allocation.count; + auto __allocation = std::__allocate_at_least(__get_allocator(), __c.size()); + __set_data(__allocation.ptr); + __set_valid_range(__front_cap(), __front_cap()); + __set_capacity(__allocation.count); typedef move_iterator _Ip; __construct_at_end(_Ip(__c.begin()), _Ip(__c.end())); } } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>& -__split_buffer<_Tp, _Allocator>::operator=(__split_buffer&& __c) +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>& +__split_buffer<_Tp, _Allocator, _Layout>::operator=(__split_buffer&& __c) _NOEXCEPT_((__alloc_traits::propagate_on_container_move_assignment::value && is_nothrow_move_assignable::value) || !__alloc_traits::propagate_on_container_move_assignment::value) { clear(); shrink_to_fit(); - __first_ = __c.__first_; - __begin_ = __c.__begin_; - __end_ = __c.__end_; - __cap_ = __c.__cap_; + __copy_without_alloc(__c); __move_assign_alloc(__c, integral_constant()); - __c.__first_ = __c.__begin_ = __c.__end_ = __c.__cap_ = nullptr; + __c.__reset(); return *this; } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::swap(__split_buffer& __x) +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::swap(__split_buffer& __x) _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>) { - std::swap(__first_, __x.__first_); - std::swap(__begin_, __x.__begin_); - std::swap(__end_, __x.__end_); - std::swap(__cap_, __x.__cap_); - std::__swap_allocator(__alloc_, __x.__alloc_); + __base_type::swap(__x); } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::shrink_to_fit() _NOEXCEPT { +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::shrink_to_fit() _NOEXCEPT { if (capacity() > size()) { #if _LIBCPP_HAS_EXCEPTIONS try { #endif // _LIBCPP_HAS_EXCEPTIONS - __split_buffer __t(size(), 0, __alloc_); + __split_buffer __t(size(), 0, __get_allocator()); if (__t.capacity() < capacity()) { - __t.__construct_at_end(move_iterator(__begin_), move_iterator(__end_)); - __t.__end_ = __t.__begin_ + (__end_ - __begin_); - std::swap(__first_, __t.__first_); - std::swap(__begin_, __t.__begin_); - std::swap(__end_, __t.__end_); - std::swap(__cap_, __t.__cap_); + __t.__construct_at_end(move_iterator(begin()), move_iterator(end())); + __t.__set_sentinel(size()); + __swap_without_allocator(__t); } #if _LIBCPP_HAS_EXCEPTIONS } catch (...) { @@ -428,55 +809,56 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::shrink_to_fi } } -template +template class _Layout> template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::emplace_front(_Args&&... __args) { - if (__begin_ == __first_) { - if (__end_ < __cap_) { - difference_type __d = __cap_ - __end_; +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emplace_front(_Args&&... __args) { + if (__front_spare() == 0) { + pointer __end = end(); + if (__back_spare() > 0) { + // The elements are pressed up against the front of the buffer: we need to move them back a + // little bit to make `emplace_front` have amortised O(1) complexity. + difference_type __d = __back_spare(); __d = (__d + 1) / 2; - __begin_ = std::move_backward(__begin_, __end_, __end_ + __d); - __end_ += __d; + auto __new_end = __end + __d; + __set_valid_range(std::move_backward(begin(), __end, __new_end), __new_end); } else { - size_type __c = std::max(2 * static_cast(__cap_ - __first_), 1); - __split_buffer __t(__c, (__c + 3) / 4, __alloc_); - __t.__construct_at_end(move_iterator(__begin_), move_iterator(__end_)); - std::swap(__first_, __t.__first_); - std::swap(__begin_, __t.__begin_); - std::swap(__end_, __t.__end_); - std::swap(__cap_, __t.__cap_); + size_type __c = std::max(2 * capacity(), 1); + __split_buffer __t(__c, (__c + 3) / 4, __get_allocator()); + __t.__construct_at_end(move_iterator(begin()), move_iterator(__end)); + __base_type::__swap_without_allocator(__t); } } - __alloc_traits::construct(__alloc_, std::__to_address(__begin_ - 1), std::forward<_Args>(__args)...); - --__begin_; + + __alloc_traits::construct(__get_allocator(), std::__to_address(begin() - 1), std::forward<_Args>(__args)...); + __set_valid_range(begin() - 1, size() + 1); } -template +template class _Layout> template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::emplace_back(_Args&&... __args) { - if (__end_ == __cap_) { - if (__begin_ > __first_) { - difference_type __d = __begin_ - __first_; +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emplace_back(_Args&&... __args) { + pointer __end = end(); + if (__back_spare() == 0) { + if (__front_spare() > 0) { + difference_type __d = __front_spare(); __d = (__d + 1) / 2; - __end_ = std::move(__begin_, __end_, __begin_ - __d); - __begin_ -= __d; + __end = std::move(begin(), __end, begin() - __d); + __set_valid_range(begin() - __d, __end); } else { - size_type __c = std::max(2 * static_cast(__cap_ - __first_), 1); - __split_buffer __t(__c, __c / 4, __alloc_); - __t.__construct_at_end(move_iterator(__begin_), move_iterator(__end_)); - std::swap(__first_, __t.__first_); - std::swap(__begin_, __t.__begin_); - std::swap(__end_, __t.__end_); - std::swap(__cap_, __t.__cap_); + size_type __c = std::max(2 * capacity(), 1); + __split_buffer __t(__c, __c / 4, __get_allocator()); + __t.__construct_at_end(move_iterator(begin()), move_iterator(__end)); + __base_type::__swap_without_allocator(__t); } } - __alloc_traits::construct(__alloc_, std::__to_address(__end_), std::forward<_Args>(__args)...); - ++__end_; + + __alloc_traits::construct(__get_allocator(), std::__to_address(__end), std::forward<_Args>(__args)...); + __set_sentinel(++__end); } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void -swap(__split_buffer<_Tp, _Allocator>& __x, __split_buffer<_Tp, _Allocator>& __y) _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) { +swap(__split_buffer<_Tp, _Allocator, _Layout>& __x, __split_buffer<_Tp, _Allocator, _Layout>& __y) + _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) { __x.swap(__y); } diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h index 5a3c13189d52f..27e681aeef22a 100644 --- a/libcxx/include/__vector/vector.h +++ b/libcxx/include/__vector/vector.h @@ -86,6 +86,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD template */> class vector { + template + using __split_buffer _LIBCPP_NODEBUG = std::__split_buffer<_Up, _Alloc, __split_buffer_pointer_layout>; + public: // // Types @@ -820,6 +823,24 @@ class vector { __add_alignment_assumption(_Ptr __p) _NOEXCEPT { return __p; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_layouts(__split_buffer<_Tp, allocator_type&>& __sb) { + auto __vector_begin = __begin_; + auto __vector_sentinel = __end_; + auto __vector_cap = __cap_; + + auto __sb_begin = __sb.begin(); + auto __sb_sentinel = __sb.__raw_sentinel(); + auto __sb_cap = __sb.__raw_capacity(); + + // TODO: replace with __set_valid_range and __set_capacity when vector supports it. + __begin_ = __sb_begin; + __end_ = __sb_sentinel; + __cap_ = __sb_cap; + + __sb.__set_valid_range(__vector_begin, __vector_sentinel); + __sb.__set_capacity(__vector_cap); + } }; #if _LIBCPP_STD_VER >= 17 @@ -850,15 +871,14 @@ template _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer& __v) { __annotate_delete(); - auto __new_begin = __v.__begin_ - (__end_ - __begin_); + auto __new_begin = __v.begin() - size(); std::__uninitialized_allocator_relocate( this->__alloc_, std::__to_address(__begin_), std::__to_address(__end_), std::__to_address(__new_begin)); - __v.__begin_ = __new_begin; + __v.__set_valid_range(__new_begin, __v.end()); __end_ = __begin_; // All the objects have been destroyed by relocating them. - std::swap(this->__begin_, __v.__begin_); - std::swap(this->__end_, __v.__end_); - std::swap(this->__cap_, __v.__cap_); - __v.__first_ = __v.__begin_; + + __swap_layouts(__v); + __v.__set_data(__v.begin()); __annotate_new(size()); } @@ -870,25 +890,23 @@ template _LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer& __v, pointer __p) { __annotate_delete(); - pointer __ret = __v.__begin_; + pointer __ret = __v.begin(); // Relocate [__p, __end_) first to avoid having a hole in [__begin_, __end_) // in case something in [__begin_, __p) throws. std::__uninitialized_allocator_relocate( - this->__alloc_, std::__to_address(__p), std::__to_address(__end_), std::__to_address(__v.__end_)); - __v.__end_ += (__end_ - __p); + this->__alloc_, std::__to_address(__p), std::__to_address(__end_), std::__to_address(__v.end())); + auto __relocated_so_far = __end_ - __p; + __v.__set_sentinel(__v.end() + __relocated_so_far); __end_ = __p; // The objects in [__p, __end_) have been destroyed by relocating them. - auto __new_begin = __v.__begin_ - (__p - __begin_); + auto __new_begin = __v.begin() - (__p - __begin_); std::__uninitialized_allocator_relocate( this->__alloc_, std::__to_address(__begin_), std::__to_address(__p), std::__to_address(__new_begin)); - __v.__begin_ = __new_begin; - __end_ = __begin_; // All the objects have been destroyed by relocating them. - - std::swap(this->__begin_, __v.__begin_); - std::swap(this->__end_, __v.__end_); - std::swap(this->__cap_, __v.__cap_); - __v.__first_ = __v.__begin_; + __v.__set_valid_range(__new_begin, __v.end()); + __end_ = __begin_; // All the objects have been destroyed by relocating them. + __swap_layouts(__v); + __v.__set_data(__v.begin()); __annotate_new(size()); return __ret; } @@ -1136,8 +1154,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args) { __split_buffer __v(__recommend(size() + 1), size(), this->__alloc_); // __v.emplace_back(std::forward<_Args>(__args)...); - __alloc_traits::construct(this->__alloc_, std::__to_address(__v.__end_), std::forward<_Args>(__args)...); - __v.__end_++; + pointer __end = __v.end(); + __alloc_traits::construct(this->__alloc_, std::__to_address(__end), std::forward<_Args>(__args)...); + __v.__set_sentinel(++__end); __swap_out_circular_buffer(__v); return this->__end_; } @@ -1332,14 +1351,14 @@ vector<_Tp, _Allocator>::__insert_with_sentinel(const_iterator __position, _Inpu __split_buffer __merged( __recommend(size() + __v.size()), __off, __alloc_); // has `__off` positions available at the front std::__uninitialized_allocator_relocate( - __alloc_, std::__to_address(__old_last), std::__to_address(this->__end_), std::__to_address(__merged.__end_)); + __alloc_, std::__to_address(__old_last), std::__to_address(this->__end_), std::__to_address(__merged.end())); __guard.__complete(); // Release the guard once objects in [__old_last_, __end_) have been successfully relocated. - __merged.__end_ += this->__end_ - __old_last; + __merged.__set_sentinel(__merged.end() + (this->__end_ - __old_last)); this->__end_ = __old_last; std::__uninitialized_allocator_relocate( - __alloc_, std::__to_address(__v.__begin_), std::__to_address(__v.__end_), std::__to_address(__merged.__end_)); - __merged.__end_ += __v.size(); - __v.__end_ = __v.__begin_; + __alloc_, std::__to_address(__v.begin()), std::__to_address(__v.end()), std::__to_address(__merged.end())); + __merged.__set_sentinel(__merged.size() + __v.size()); + __v.__set_sentinel(__v.begin()); __p = __swap_out_circular_buffer(__merged, __p); } return __make_iter(__p); diff --git a/libcxx/include/deque b/libcxx/include/deque index 395a1076fd3c4..98d1dbbddb7e8 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -487,6 +487,9 @@ const _DiffType __deque_iterator<_ValueType, _Pointer, _Reference, _MapPointer, template */> class deque { + template + using __split_buffer _LIBCPP_NODEBUG = std::__split_buffer<_Up, _Alloc, __split_buffer_pointer_layout>; + public: // types: @@ -1238,8 +1241,8 @@ private: clear(); shrink_to_fit(); } - __alloc() = __c.__alloc(); - __map_.__alloc_ = __c.__map_.__alloc_; + __alloc() = __c.__alloc(); + __map_.__get_allocator() = __c.__map_.__get_allocator(); } _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const deque&, false_type) {} @@ -1318,7 +1321,7 @@ deque<_Tp, _Allocator>::deque(const deque& __c) : __map_(__pointer_allocator(__alloc_traits::select_on_container_copy_construction(__c.__alloc()))), __start_(0), __size_(0), - __alloc_(__map_.__alloc_) { + __alloc_(__map_.__get_allocator()) { __annotate_new(0); __append(__c.begin(), __c.end()); } @@ -2071,7 +2074,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity() { // Else need to allocate 1 buffer, *and* we need to reallocate __map_. else { __split_buffer __buf( - std::max(2 * __map_.capacity(), 1), 0, __map_.__alloc_); + std::max(2 * __map_.capacity(), 1), 0, __map_.__get_allocator()); typedef __allocator_destructor<_Allocator> _Dp; unique_ptr __hold(__alloc_traits::allocate(__a, __block_size), _Dp(__a, __block_size)); @@ -2080,10 +2083,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity() { for (__map_pointer __i = __map_.begin(); __i != __map_.end(); ++__i) __buf.emplace_back(*__i); - std::swap(__map_.__first_, __buf.__first_); - std::swap(__map_.__begin_, __buf.__begin_); - std::swap(__map_.__end_, __buf.__end_); - std::swap(__map_.__cap_, __buf.__cap_); + __map_.__swap_without_allocator(__buf); __start_ = __map_.size() == 1 ? __block_size / 2 : __start_ + __block_size; } __annotate_whole_block(0, __asan_poison); @@ -2134,7 +2134,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity(size_type __n) { else { size_type __ds = (__nb + __back_capacity) * __block_size - __map_.empty(); __split_buffer __buf( - std::max(2 * __map_.capacity(), __nb + __map_.size()), 0, __map_.__alloc_); + std::max(2 * __map_.capacity(), __nb + __map_.size()), 0, __map_.__get_allocator()); # if _LIBCPP_HAS_EXCEPTIONS try { # endif // _LIBCPP_HAS_EXCEPTIONS @@ -2157,10 +2157,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity(size_type __n) { } for (__map_pointer __i = __map_.begin(); __i != __map_.end(); ++__i) __buf.emplace_back(*__i); - std::swap(__map_.__first_, __buf.__first_); - std::swap(__map_.__begin_, __buf.__begin_); - std::swap(__map_.__end_, __buf.__end_); - std::swap(__map_.__cap_, __buf.__cap_); + __map_.__swap_without_allocator(__buf); __start_ += __ds; } } @@ -2194,7 +2191,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity() { // Else need to allocate 1 buffer, *and* we need to reallocate __map_. else { __split_buffer __buf( - std::max(2 * __map_.capacity(), 1), __map_.size(), __map_.__alloc_); + std::max(2 * __map_.capacity(), 1), __map_.size(), __map_.__get_allocator()); typedef __allocator_destructor<_Allocator> _Dp; unique_ptr __hold(__alloc_traits::allocate(__a, __block_size), _Dp(__a, __block_size)); @@ -2203,10 +2200,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity() { for (__map_pointer __i = __map_.end(); __i != __map_.begin();) __buf.emplace_front(*--__i); - std::swap(__map_.__first_, __buf.__first_); - std::swap(__map_.__begin_, __buf.__begin_); - std::swap(__map_.__end_, __buf.__end_); - std::swap(__map_.__cap_, __buf.__cap_); + __map_.__swap_without_allocator(__buf); __annotate_whole_block(__map_.size() - 1, __asan_poison); } } @@ -2259,7 +2253,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity(size_type __n) { __split_buffer __buf( std::max(2 * __map_.capacity(), __nb + __map_.size()), __map_.size() - __front_capacity, - __map_.__alloc_); + __map_.__get_allocator()); # if _LIBCPP_HAS_EXCEPTIONS try { # endif // _LIBCPP_HAS_EXCEPTIONS @@ -2282,10 +2276,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity(size_type __n) { } for (__map_pointer __i = __map_.end(); __i != __map_.begin();) __buf.emplace_front(*--__i); - std::swap(__map_.__first_, __buf.__first_); - std::swap(__map_.__begin_, __buf.__begin_); - std::swap(__map_.__end_, __buf.__end_); - std::swap(__map_.__cap_, __buf.__cap_); + __map_.__swap_without_allocator(__buf); __start_ -= __ds; } } diff --git a/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp b/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp index 546240a6c3286..c04e9443c8e67 100644 --- a/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp +++ b/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp @@ -133,16 +133,58 @@ static_assert(!std::__is_replaceable::value, ""); // ---------------------- // __split_buffer -static_assert(std::__is_replaceable >::value, ""); -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable > >::value, - ""); -static_assert(!std::__is_replaceable > >::value, - ""); -static_assert(std::__is_replaceable > >::value, +static_assert( + std::__is_replaceable, std::__split_buffer_pointer_layout> >::value, + ""); +static_assert(std::__is_replaceable, + std::__split_buffer_pointer_layout> >::value, ""); -static_assert(std::__is_replaceable > >::value, +static_assert( + !std::__is_replaceable< + std::__split_buffer, std::__split_buffer_pointer_layout > >:: + value, + ""); +static_assert( + !std::__is_replaceable< + std::__split_buffer, std::__split_buffer_pointer_layout > >:: + value, + ""); +static_assert( + std::__is_replaceable< + std::__split_buffer, std::__split_buffer_pointer_layout > >:: + value, + ""); +static_assert( + std::__is_replaceable< + std::__split_buffer, std::__split_buffer_pointer_layout > >:: + value, + ""); + +static_assert( + std::__is_replaceable, std::__split_buffer_size_layout> >::value, ""); +static_assert(std::__is_replaceable, + std::__split_buffer_size_layout> >::value, ""); +static_assert( + !std::__is_replaceable< + std::__split_buffer, std::__split_buffer_size_layout > >::value, + ""); +static_assert( + !std::__is_replaceable< + std::__split_buffer, std::__split_buffer_size_layout > >::value, + ""); +static_assert( + std::__is_replaceable< + std::__split_buffer, std::__split_buffer_size_layout > >:: + value, + ""); +static_assert( + std::__is_replaceable< + std::__split_buffer, std::__split_buffer_size_layout > >:: + value, + ""); // standard library types // ---------------------- diff --git a/libcxx/test/libcxx/type_traits/is_trivially_relocatable.compile.pass.cpp b/libcxx/test/libcxx/type_traits/is_trivially_relocatable.compile.pass.cpp index c462672616f77..10889eb50870d 100644 --- a/libcxx/test/libcxx/type_traits/is_trivially_relocatable.compile.pass.cpp +++ b/libcxx/test/libcxx/type_traits/is_trivially_relocatable.compile.pass.cpp @@ -68,9 +68,27 @@ static_assert(!std::__libcpp_is_trivially_relocatable::val // ---------------------- // __split_buffer -static_assert(std::__libcpp_is_trivially_relocatable >::value, ""); -static_assert(std::__libcpp_is_trivially_relocatable >::value, ""); -static_assert(!std::__libcpp_is_trivially_relocatable > >::value, ""); +static_assert(std::__libcpp_is_trivially_relocatable< + std::__split_buffer, std::__split_buffer_pointer_layout> >::value, + ""); +static_assert(std::__libcpp_is_trivially_relocatable, + std::__split_buffer_pointer_layout> >::value, + ""); +static_assert(!std::__libcpp_is_trivially_relocatable< + std::__split_buffer, std::__split_buffer_pointer_layout > >::value, + ""); + +static_assert(std::__libcpp_is_trivially_relocatable< + std::__split_buffer, std::__split_buffer_size_layout> >::value, + ""); +static_assert(std::__libcpp_is_trivially_relocatable, + std::__split_buffer_size_layout> >::value, + ""); +static_assert(!std::__libcpp_is_trivially_relocatable< + std::__split_buffer, std::__split_buffer_size_layout > >::value, + ""); // standard library types // ---------------------- diff --git a/lldb/examples/synthetic/libcxx.py b/lldb/examples/synthetic/libcxx.py index 5abeb3061f4f5..549255e280c1d 100644 --- a/lldb/examples/synthetic/libcxx.py +++ b/lldb/examples/synthetic/libcxx.py @@ -1,3 +1,6 @@ +from enum import Enum +from sys import stderr +import sys import lldb import lldb.formatters.Logger @@ -74,6 +77,59 @@ def stdstring_SummaryProvider(valobj, dict): return '"' + strval + '"' +def get_buffer_end(buffer, begin): + """ + Returns a pointer to where the next element would be pushed. + + For libc++'s stable ABI and unstable < LLVM 22, returns `__end_`. + For libc++'s unstable ABI, returns `__begin_ + __size_`. + """ + map_end = buffer.GetChildMemberWithName("__end_") + if map_end.IsValid(): + return map_end.GetValueAsUnsigned(0) + map_size = buffer.GetChildMemberWithName("__size_").GetValueAsUnsigned(0) + return begin + map_size + + +def get_buffer_endcap(parent, buffer, begin, has_compressed_pair_layout, is_size_based): + """ + Returns a pointer to the end of the buffer. + + For libc++'s stable ABI and unstable < LLVM 22, returns: + * `__end_cap_`, if `__compressed_pair` is being used + * `__cap_`, otherwise + For libc++'s unstable ABI, returns `__begin_ + __cap_`. + """ + if has_compressed_pair_layout: + map_endcap = parent._get_value_of_compressed_pair( + buffer.GetChildMemberWithName("__end_cap_") + ) + elif buffer.GetType().GetNumberOfDirectBaseClasses() == 1: + # LLVM 22's __split_buffer is derived from a base class that describes its layout. When the + # compressed pair ABI is required, we also use an anonymous struct. Per [#158131], LLDB + # is unable to access members of an anonymous struct to a base class, through the derived + # class. This means that in order to access the compressed pair's pointer, we need to first + # get to its base class. + # + # [#158131]: https://github.com/llvm/llvm-project/issues/158131 + buffer = buffer.GetChildAtIndex(0) + if is_size_based: + map_endcap = buffer.GetChildMemberWithName("__cap_") + else: + map_endcap = buffer.GetChildMemberWithName("__back_cap_") + map_endcap = map_endcap.GetValueAsUnsigned(0) + else: + map_endcap = buffer.GetChildMemberWithName("__cap_") + if not map_endcap.IsValid(): + map_endcap = buffer.GetChildMemberWithName("__end_cap_") + map_endcap = map_endcap.GetValueAsUnsigned(0) + + if is_size_based: + return begin + map_endcap + + return map_endcap + + class stdvector_SynthProvider: def __init__(self, valobj, dict): logger = lldb.formatters.Logger.Logger() @@ -755,23 +811,21 @@ def update(self): if self.block_size < 0: logger.write("block_size < 0") return - map_ = self.valobj.GetChildMemberWithName("__map_") start = self.valobj.GetChildMemberWithName("__start_").GetValueAsUnsigned(0) + + map_ = self.valobj.GetChildMemberWithName("__map_") + is_size_based = map_.GetChildMemberWithName("__size_").IsValid() first = map_.GetChildMemberWithName("__first_") + # LLVM 22 renames __map_.__begin_ to __map_.__front_cap_ + if not first: + first = map_.GetChildMemberWithName("__front_cap_") map_first = first.GetValueAsUnsigned(0) self.map_begin = map_.GetChildMemberWithName("__begin_") map_begin = self.map_begin.GetValueAsUnsigned(0) - map_end = map_.GetChildMemberWithName("__end_").GetValueAsUnsigned(0) - - if has_compressed_pair_layout: - map_endcap = self._get_value_of_compressed_pair( - map_.GetChildMemberWithName("__end_cap_") - ) - else: - map_endcap = map_.GetChildMemberWithName("__cap_") - if not map_endcap.IsValid(): - map_endcap = map_.GetChildMemberWithName("__end_cap_") - map_endcap = map_endcap.GetValueAsUnsigned(0) + map_end = get_buffer_end(map_, map_begin) + map_endcap = get_buffer_endcap( + self, map_, map_begin, has_compressed_pair_layout, is_size_based + ) # check consistency if not map_first <= map_begin <= map_end <= map_endcap: From 13547a9a777790ea05058e37c63b134e425fc8c2 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 12 Sep 2025 08:39:16 -0500 Subject: [PATCH 123/734] =?UTF-8?q?[flang][OpenMP]=20Turn=20IsStrictlyStru?= =?UTF-8?q?cturedBlock=20into=20utility=20function,=E2=80=A6=20(#158111)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … NFC --- flang/include/flang/Parser/openmp-utils.h | 2 ++ flang/include/flang/Semantics/openmp-utils.h | 1 + flang/lib/Parser/openmp-parsers.cpp | 14 +++---------- flang/lib/Parser/openmp-utils.cpp | 13 ++++++++++++ flang/lib/Semantics/check-omp-structure.cpp | 18 ++--------------- flang/lib/Semantics/openmp-utils.cpp | 21 +++++++++++++------- 6 files changed, 35 insertions(+), 34 deletions(-) diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h index 3d3dfae290d96..8205d25647916 100644 --- a/flang/include/flang/Parser/openmp-utils.h +++ b/flang/include/flang/Parser/openmp-utils.h @@ -155,6 +155,8 @@ template OmpDirectiveName GetOmpDirectiveName(const T &x) { } const OmpObjectList *GetOmpObjectList(const OmpClause &clause); +const BlockConstruct *GetFortranBlockConstruct( + const ExecutionPartConstruct &epc); } // namespace Fortran::parser::omp diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h index 1c54124a5738a..68318d6093a1e 100644 --- a/flang/include/flang/Semantics/openmp-utils.h +++ b/flang/include/flang/Semantics/openmp-utils.h @@ -83,6 +83,7 @@ const SomeExpr *HasStorageOverlap( bool IsAssignment(const parser::ActionStmt *x); bool IsPointerAssignment(const evaluate::Assignment &x); const parser::Block &GetInnermostExecPart(const parser::Block &block); +bool IsStrictlyStructuredBlock(const parser::Block &block); } // namespace omp } // namespace Fortran::semantics diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 68e0acdf91fe2..78a5746bb0bf8 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -15,6 +15,7 @@ #include "stmt-parser.h" #include "token-parsers.h" #include "type-parser-implementation.h" +#include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -24,6 +25,7 @@ // OpenMP Directives and Clauses namespace Fortran::parser { +using namespace Fortran::parser::omp; // Helper function to print the buffer contents starting at the current point. [[maybe_unused]] static std::string ahead(const ParseState &state) { @@ -1280,16 +1282,6 @@ TYPE_PARSER(sourced( maybe(Parser{}), pure(OmpDirectiveSpecification::Flags::None)))) -static bool IsFortranBlockConstruct(const ExecutionPartConstruct &epc) { - // ExecutionPartConstruct -> ExecutableConstruct - // -> Indirection - if (auto *ec{std::get_if(&epc.u)}) { - return std::holds_alternative>(ec->u); - } else { - return false; - } -} - static bool IsStandaloneOrdered(const OmpDirectiveSpecification &dirSpec) { // An ORDERED construct is standalone if it has DOACROSS or DEPEND clause. return dirSpec.DirId() == llvm::omp::Directive::OMPD_ordered && @@ -1307,7 +1299,7 @@ struct StrictlyStructuredBlockParser { // Detect BLOCK construct without parsing the entire thing. if (lookAhead(skipStuffBeforeStatement >> "BLOCK"_tok).Parse(state)) { if (auto epc{Parser{}.Parse(state)}) { - if (IsFortranBlockConstruct(*epc)) { + if (GetFortranBlockConstruct(*epc) != nullptr) { Block body; body.emplace_back(std::move(*epc)); return std::move(body); diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp index ef7e4fcdbbd07..937a17f29f221 100644 --- a/flang/lib/Parser/openmp-utils.cpp +++ b/flang/lib/Parser/openmp-utils.cpp @@ -12,6 +12,7 @@ #include "flang/Parser/openmp-utils.h" +#include "flang/Common/indirection.h" #include "flang/Common/template.h" #include "flang/Common/visit.h" @@ -61,4 +62,16 @@ const OmpObjectList *GetOmpObjectList(const OmpClause &clause) { clause.u); } +const BlockConstruct *GetFortranBlockConstruct( + const ExecutionPartConstruct &epc) { + // ExecutionPartConstruct -> ExecutableConstruct + // -> Indirection + if (auto *ec{std::get_if(&epc.u)}) { + if (auto *ind{std::get_if>(&ec->u)}) { + return &ind->value(); + } + } + return nullptr; +} + } // namespace Fortran::parser::omp diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index d1654a3adcc9c..b7f72756c9530 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -874,22 +874,8 @@ void OmpStructureChecker::Enter(const parser::OmpBlockConstruct &x) { // Missing mandatory end block: this is checked in semantics because that // makes it easier to control the error messages. // The end block is mandatory when the construct is not applied to a strictly - // structured block (aka it is applied to a loosely structured block). In - // other words, the body doesn't contain exactly one parser::BlockConstruct. - auto isStrictlyStructuredBlock{[](const parser::Block &block) -> bool { - if (block.size() != 1) { - return false; - } - const parser::ExecutionPartConstruct &contents{block.front()}; - auto *executableConstruct{ - std::get_if(&contents.u)}; - if (!executableConstruct) { - return false; - } - return std::holds_alternative>( - executableConstruct->u); - }}; - if (!endSpec && !isStrictlyStructuredBlock(block)) { + // structured block (aka it is applied to a loosely structured block). + if (!endSpec && !IsStrictlyStructuredBlock(block)) { llvm::omp::Directive dirId{beginSpec.DirId()}; auto &msg{context_.Say(beginSpec.source, "Expected OpenMP END %s directive"_err_en_US, diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp index e8df346ccdc3e..2980f827d3ef3 100644 --- a/flang/lib/Semantics/openmp-utils.cpp +++ b/flang/lib/Semantics/openmp-utils.cpp @@ -21,6 +21,7 @@ #include "flang/Evaluate/traverse.h" #include "flang/Evaluate/type.h" #include "flang/Evaluate/variable.h" +#include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/expression.h" #include "flang/Semantics/semantics.h" @@ -37,6 +38,7 @@ #include namespace Fortran::semantics::omp { +using namespace Fortran::parser::omp; SourcedActionStmt GetActionStmt(const parser::ExecutionPartConstruct *x) { if (x == nullptr) { @@ -397,16 +399,21 @@ const parser::Block &GetInnermostExecPart(const parser::Block &block) { const parser::Block *iter{&block}; while (iter->size() == 1) { const parser::ExecutionPartConstruct &ep{iter->front()}; - if (auto *exec{std::get_if(&ep.u)}) { - using BlockConstruct = common::Indirection; - if (auto *bc{std::get_if(&exec->u)}) { - iter = &std::get(bc->value().t); - continue; - } + if (auto *bc{GetFortranBlockConstruct(ep)}) { + iter = &std::get(bc->t); + } else { + break; } - break; } return *iter; } +bool IsStrictlyStructuredBlock(const parser::Block &block) { + if (block.size() == 1) { + return GetFortranBlockConstruct(block.front()) != nullptr; + } else { + return false; + } +} + } // namespace Fortran::semantics::omp From cdd54ff927e0207a3e65bade576e3c2511112953 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 12 Sep 2025 08:40:00 -0500 Subject: [PATCH 124/734] [flang][OpenMP] Frontend support for REPLAYABLE and TRANSPARENT clauses (#158149) Parsing and semantic checks. --- flang/include/flang/Lower/OpenMP/Clauses.h | 2 + flang/include/flang/Parser/dump-parse-tree.h | 2 + flang/include/flang/Parser/parse-tree.h | 16 ++++ flang/lib/Lower/OpenMP/Clauses.cpp | 4 +- flang/lib/Parser/openmp-parsers.cpp | 12 +++ flang/lib/Semantics/check-omp-structure.cpp | 2 + .../test/Parser/OpenMP/replayable-clause.f90 | 60 +++++++++++++++ .../test/Parser/OpenMP/transparent-clause.f90 | 76 +++++++++++++++++++ .../Semantics/OpenMP/replayable-clause.f90 | 22 ++++++ .../Semantics/OpenMP/transparent-clause.f90 | 19 +++++ llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 15 +++- llvm/include/llvm/Frontend/OpenMP/OMP.td | 20 +++++ 12 files changed, 248 insertions(+), 2 deletions(-) create mode 100644 flang/test/Parser/OpenMP/replayable-clause.f90 create mode 100644 flang/test/Parser/OpenMP/transparent-clause.f90 create mode 100644 flang/test/Semantics/OpenMP/replayable-clause.f90 create mode 100644 flang/test/Semantics/OpenMP/transparent-clause.f90 diff --git a/flang/include/flang/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h index 638846835094c..18e2f209c2d7a 100644 --- a/flang/include/flang/Lower/OpenMP/Clauses.h +++ b/flang/include/flang/Lower/OpenMP/Clauses.h @@ -277,6 +277,7 @@ using Read = tomp::clause::ReadT; using Reduction = tomp::clause::ReductionT; using Relaxed = tomp::clause::RelaxedT; using Release = tomp::clause::ReleaseT; +using Replayable = tomp::clause::ReplayableT; using ReverseOffload = tomp::clause::ReverseOffloadT; using Safelen = tomp::clause::SafelenT; using Schedule = tomp::clause::ScheduleT; @@ -290,6 +291,7 @@ using Permutation = tomp::clause::PermutationT; using TaskReduction = tomp::clause::TaskReductionT; using ThreadLimit = tomp::clause::ThreadLimitT; using Threads = tomp::clause::ThreadsT; +using Transparent = tomp::clause::TransparentT; using To = tomp::clause::ToT; using UnifiedAddress = tomp::clause::UnifiedAddressT; using UnifiedSharedMemory = diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index d2ab7cbd8fe35..1c9fd7673e06d 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -658,6 +658,7 @@ class ParseTreeDumper { NODE(parser, OmpReductionSpecifier) NODE(parser, OmpRefModifier) NODE_ENUM(OmpRefModifier, Value) + NODE(parser, OmpReplayableClause) NODE(parser, OmpScheduleClause) NODE(OmpScheduleClause, Modifier) NODE_ENUM(OmpScheduleClause, Kind) @@ -686,6 +687,7 @@ class ParseTreeDumper { NODE(parser, OmpTraitSetSelector) NODE(parser, OmpTraitSetSelectorName) NODE_ENUM(OmpTraitSetSelectorName, Value) + NODE(parser, OmpTransparentClause) NODE(parser, OmpTypeNameList) NODE(parser, OmpTypeSpecifier) NODE(parser, OmpUpdateClause) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 622b5f90a9fba..951c96b974141 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4643,6 +4643,14 @@ struct OmpReductionClause { std::tuple t; }; +// Ref: [6.0:440:441] +// +// replayable-clause -> +// REPLAYABLE[(replayable-expression)] // since 6.0 +struct OmpReplayableClause { + WRAPPER_CLASS_BOILERPLATE(OmpReplayableClause, Scalar>); +}; + // Ref: [4.5:56-63], [5.0:101-109], [5.1:126-133], [5.2:252-254] // // schedule-clause -> @@ -4692,6 +4700,14 @@ struct OmpToClause { std::tuple t; }; +// Ref: [6.0:510-511] +// +// transparent-clause -> +// TRANSPARENT[(impex-type)] // since 6.0 +struct OmpTransparentClause { + WRAPPER_CLASS_BOILERPLATE(OmpTransparentClause, ScalarIntExpr); +}; + // Ref: [5.0:254-255], [5.1:287-288], [5.2:321-322] // // In ATOMIC construct diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index cecc1a9395892..78fe5aa031ba1 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -239,11 +239,11 @@ MAKE_EMPTY_CLASS(Relaxed, Relaxed); MAKE_EMPTY_CLASS(Release, Release); MAKE_EMPTY_CLASS(ReverseOffload, ReverseOffload); MAKE_EMPTY_CLASS(SeqCst, SeqCst); +MAKE_EMPTY_CLASS(SelfMaps, SelfMaps); MAKE_EMPTY_CLASS(Simd, Simd); MAKE_EMPTY_CLASS(Threads, Threads); MAKE_EMPTY_CLASS(UnifiedAddress, UnifiedAddress); MAKE_EMPTY_CLASS(UnifiedSharedMemory, UnifiedSharedMemory); -MAKE_EMPTY_CLASS(SelfMaps, SelfMaps); MAKE_EMPTY_CLASS(Unknown, Unknown); MAKE_EMPTY_CLASS(Untied, Untied); MAKE_EMPTY_CLASS(Weak, Weak); @@ -257,6 +257,8 @@ MAKE_EMPTY_CLASS(Threadprivate, Threadprivate); MAKE_INCOMPLETE_CLASS(AdjustArgs, AdjustArgs); MAKE_INCOMPLETE_CLASS(AppendArgs, AppendArgs); +MAKE_INCOMPLETE_CLASS(Replayable, Replayable); +MAKE_INCOMPLETE_CLASS(Transparent, Transparent); List makeIteratorSpecifiers(const parser::OmpIteratorSpecifier &inp, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 78a5746bb0bf8..519bce64321d4 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -36,6 +36,9 @@ using namespace Fortran::parser::omp; constexpr auto startOmpLine = skipStuffBeforeStatement >> "!$OMP "_sptok; constexpr auto endOmpLine = space >> endOfLine; +constexpr auto logicalConstantExpr{logical(constantExpr)}; +constexpr auto scalarLogicalConstantExpr{scalar(logicalConstantExpr)}; + // Given a parser for a single element, and a parser for a list of elements // of the same type, create a parser that constructs the entire list by having // the single element be the head of the list, and the rest be the tail. @@ -870,6 +873,8 @@ TYPE_PARSER(construct( maybe(nonemptyList(Parser{}) / ":"), Parser{})) +TYPE_PARSER(construct(scalarLogicalConstantExpr)) + // OMP 5.0 2.19.5.6 IN_REDUCTION (reduction-identifier: variable-name-list) TYPE_PARSER(construct( maybe(nonemptyList(Parser{}) / ":"), @@ -879,6 +884,8 @@ TYPE_PARSER(construct( maybe(nonemptyList(Parser{}) / ":"), Parser{})) +TYPE_PARSER(construct(scalarIntExpr)) + // OMP 5.0 2.11.4 allocate-clause -> ALLOCATE ([allocator:] variable-name-list) // OMP 5.2 2.13.4 allocate-clause -> ALLOCATE ([allocate-modifier // [, allocate-modifier] :] @@ -1194,6 +1201,8 @@ TYPE_PARSER( // "READ" >> construct(construct()) || "RELAXED" >> construct(construct()) || "RELEASE" >> construct(construct()) || + "REPLAYABLE" >> construct(construct( + maybe(parenthesized(Parser{})))) || "REVERSE_OFFLOAD" >> construct(construct()) || "SAFELEN" >> construct(construct( @@ -1217,6 +1226,9 @@ TYPE_PARSER( // parenthesized(scalarIntExpr))) || "TO" >> construct(construct( parenthesized(Parser{}))) || + "TRANSPARENT" >> + construct(construct( + maybe(parenthesized(Parser{})))) || "USE" >> construct(construct( parenthesized(Parser{}))) || "USE_DEVICE_PTR" >> construct(construct( diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index b7f72756c9530..6bc9f9955fe24 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2831,6 +2831,8 @@ CHECK_SIMPLE_CLAUSE(AcqRel, OMPC_acq_rel) CHECK_SIMPLE_CLAUSE(Acquire, OMPC_acquire) CHECK_SIMPLE_CLAUSE(Relaxed, OMPC_relaxed) CHECK_SIMPLE_CLAUSE(Release, OMPC_release) +CHECK_SIMPLE_CLAUSE(Replayable, OMPC_replayable) +CHECK_SIMPLE_CLAUSE(Transparent, OMPC_transparent) CHECK_SIMPLE_CLAUSE(SeqCst, OMPC_seq_cst) CHECK_SIMPLE_CLAUSE(Fail, OMPC_fail) diff --git a/flang/test/Parser/OpenMP/replayable-clause.f90 b/flang/test/Parser/OpenMP/replayable-clause.f90 new file mode 100644 index 0000000000000..c1733449fcb70 --- /dev/null +++ b/flang/test/Parser/OpenMP/replayable-clause.f90 @@ -0,0 +1,60 @@ +!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s +!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine f00 + !$omp task replayable + block + end block +end + +!UNPARSE: SUBROUTINE f00 +!UNPARSE: !$OMP TASK REPLAYABLE +!UNPARSE: BLOCK +!UNPARSE: END BLOCK +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = task +!PARSE-TREE: | | OmpClauseList -> OmpClause -> Replayable -> +!PARSE-TREE: | | Flags = None +!PARSE-TREE: | Block + + +subroutine f01(x) + implicit none + integer :: x + !$omp target_update to(x) replayable(.true.) +end + +!UNPARSE: SUBROUTINE f01 (x) +!UNPARSE: IMPLICIT NONE +!UNPARSE: INTEGER x +!UNPARSE: !$OMP TARGET_UPDATE TO(x) REPLAYABLE(.true._4) +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target update +!PARSE-TREE: | OmpClauseList -> OmpClause -> To -> OmpToClause +!PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | bool = 'true' +!PARSE-TREE: | OmpClause -> Replayable -> OmpReplayableClause -> Scalar -> Logical -> Constant -> Expr = '.true._4' +!PARSE-TREE: | | LiteralConstant -> LogicalLiteralConstant +!PARSE-TREE: | | | bool = 'true' +!PARSE-TREE: | Flags = None + + +subroutine f02 + !$omp taskwait replayable(.false.) +end + +!UNPARSE: SUBROUTINE f02 +!UNPARSE: !$OMP TASKWAIT REPLAYABLE(.false._4) +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = taskwait +!PARSE-TREE: | OmpClauseList -> OmpClause -> Replayable -> OmpReplayableClause -> Scalar -> Logical -> Constant -> Expr = '.false._4' +!PARSE-TREE: | | LiteralConstant -> LogicalLiteralConstant +!PARSE-TREE: | | | bool = 'false' +!PARSE-TREE: | Flags = None diff --git a/flang/test/Parser/OpenMP/transparent-clause.f90 b/flang/test/Parser/OpenMP/transparent-clause.f90 new file mode 100644 index 0000000000000..01f49f5e8a15d --- /dev/null +++ b/flang/test/Parser/OpenMP/transparent-clause.f90 @@ -0,0 +1,76 @@ +!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s +!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine f00(x) + implicit none + integer :: x + !$omp target_data map(to: x) transparent + block + end block +end + +!UNPARSE: SUBROUTINE f00 (x) +!UNPARSE: IMPLICIT NONE +!UNPARSE: INTEGER x +!UNPARSE: !$OMP TARGET_DATA MAP(TO: x) TRANSPARENT +!UNPARSE: BLOCK +!UNPARSE: END BLOCK +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target data +!PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause +!PARSE-TREE: | | | Modifier -> OmpMapType -> Value = To +!PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | | bool = 'true' +!PARSE-TREE: | | OmpClause -> Transparent -> +!PARSE-TREE: | | Flags = None +!PARSE-TREE: | Block + + +subroutine f01 + !$omp task transparent(0) + !$omp end task +end + +!UNPARSE: SUBROUTINE f01 +!UNPARSE: !$OMP TASK TRANSPARENT(0_4) +!UNPARSE: !$OMP END TASK +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = task +!PARSE-TREE: | | OmpClauseList -> OmpClause -> Transparent -> OmpTransparentClause -> Scalar -> Integer -> Expr = '0_4' +!PARSE-TREE: | | | LiteralConstant -> IntLiteralConstant = '0' +!PARSE-TREE: | | Flags = None +!PARSE-TREE: | Block +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = task +!PARSE-TREE: | | OmpClauseList -> +!PARSE-TREE: | | Flags = None + + +subroutine f02 + implicit none + integer :: i + !$omp taskloop transparent(2) + do i = 1, 10 + end do +end + +!UNPARSE: SUBROUTINE f02 +!UNPARSE: IMPLICIT NONE +!UNPARSE: INTEGER i +!UNPARSE: !$OMP TASKLOOP TRANSPARENT(2_4) +!UNPARSE: DO i=1_4,10_4 +!UNPARSE: END DO +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!PARSE-TREE: | OmpBeginLoopDirective +!PARSE-TREE: | | OmpLoopDirective -> llvm::omp::Directive = taskloop +!PARSE-TREE: | | OmpClauseList -> OmpClause -> Transparent -> OmpTransparentClause -> Scalar -> Integer -> Expr = '2_4' +!PARSE-TREE: | | | LiteralConstant -> IntLiteralConstant = '2' +!PARSE-TREE: | DoConstruct diff --git a/flang/test/Semantics/OpenMP/replayable-clause.f90 b/flang/test/Semantics/OpenMP/replayable-clause.f90 new file mode 100644 index 0000000000000..b8fe6cea23a6f --- /dev/null +++ b/flang/test/Semantics/OpenMP/replayable-clause.f90 @@ -0,0 +1,22 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60 + +subroutine f00(x) + implicit none + logical :: x + !ERROR: Must be a constant value + !$omp task replayable(x) + !$omp end task +end + +subroutine f01 + !ERROR: Must have LOGICAL type, but is INTEGER(4) + !$omp task replayable(7) + !$omp end task +end + +subroutine f02 + !No diagnostic expected + !$omp task replayable + !$omp end task +end + diff --git a/flang/test/Semantics/OpenMP/transparent-clause.f90 b/flang/test/Semantics/OpenMP/transparent-clause.f90 new file mode 100644 index 0000000000000..4831ba0f7cef6 --- /dev/null +++ b/flang/test/Semantics/OpenMP/transparent-clause.f90 @@ -0,0 +1,19 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60 + +subroutine f00(x) + integer :: x(10) + !ERROR: Must be a scalar value, but is a rank-1 array + !$omp task transparent(x) + !$omp end task +end + +subroutine f01 + implicit none + integer :: i + !ERROR: Must have INTEGER type, but is CHARACTER(KIND=1,LEN=5_8) + !$omp taskloop transparent("hello") + do i = 1, 10 + end do + !$omp end taskloop +end + diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 56905854f9baa..1ed23eed1571d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -1046,6 +1046,12 @@ struct ReleaseT { using EmptyTrait = std::true_type; }; +// [6.0:440-441] `replayable` clause +template // +struct ReplayableT { + using IncompleteTrait = std::true_type; +}; + // V5.2: [8.2.1] `requirement` clauses template // struct ReverseOffloadT { @@ -1153,6 +1159,12 @@ struct ToT { std::tuple t; }; +// [6.0:440-441] `transparent` clause +template // +struct TransparentT { + using IncompleteTrait = std::true_type; +}; + // V5.2: [8.2.1] `requirement` clauses template // struct UnifiedAddressT { @@ -1279,7 +1291,8 @@ using EmptyClausesT = std::variant< template using IncompleteClausesT = std::variant, AppendArgsT, MatchT, - OtherwiseT, WhenT>; + OtherwiseT, ReplayableT, + TransparentT, WhenT>; template using TupleClausesT = diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index ce136197dd0d7..6a41c24e78149 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -456,6 +456,10 @@ def OMPC_Relaxed : Clause<[Spelling<"relaxed">]> { def OMPC_Release : Clause<[Spelling<"release">]> { let clangClass = "OMPReleaseClause"; } +def OMPC_Replayable : Clause<[Spelling<"replayable">]> { + let flangClass = "OmpReplayableClause"; + let isValueOptional = true; +} def OMPC_ReverseOffload : Clause<[Spelling<"reverse_offload">]> { let clangClass = "OMPReverseOffloadClause"; } @@ -523,6 +527,10 @@ def OMPC_To : Clause<[Spelling<"to">]> { let clangClass = "OMPToClause"; let flangClass = "OmpToClause"; } +def OMPC_Transparent : Clause<[Spelling<"transparent">]> { + let flangClass = "OmpTransparentClause"; + let isValueOptional = true; +} def OMPC_UnifiedAddress : Clause<[Spelling<"unified_address">]> { let clangClass = "OMPUnifiedAddressClause"; } @@ -1128,6 +1136,7 @@ def OMP_Target : Directive<[Spelling<"target">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, ]; let association = AS_Block; @@ -1139,6 +1148,7 @@ def OMP_TargetData : Directive<[Spelling<"target data", 1, 52>, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let requiredClauses = [ VersionedClause, @@ -1157,6 +1167,7 @@ def OMP_TargetEnterData : Directive<[Spelling<"target enter data", 1, 52>, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let requiredClauses = [ VersionedClause, @@ -1173,6 +1184,7 @@ def OMP_TargetExitData : Directive<[Spelling<"target exit data", 1, 52>, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let requiredClauses = [ VersionedClause, @@ -1191,6 +1203,7 @@ def OMP_TargetUpdate : Directive<[Spelling<"target update", 1, 52>, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let association = AS_None; let category = CA_Executable; @@ -1213,6 +1226,8 @@ def OMP_Task : Directive<[Spelling<"task">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, ]; let association = AS_Block; let category = CA_Executable; @@ -1254,6 +1269,8 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, ]; let allowedExclusiveClauses = [ VersionedClause, @@ -1267,6 +1284,9 @@ def OMP_TaskWait : Directive<[Spelling<"taskwait">]> { VersionedClause, VersionedClause, ]; + let allowedOnceClauses = [ + VersionedClause, + ]; let association = AS_None; let category = CA_Executable; } From b39da343a2cb958016d37081e3fc3b0cfe5cb2ab Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 12 Sep 2025 14:51:18 +0100 Subject: [PATCH 125/734] Revert "[mlir][Transforms] Fix crash in `reconcile-unrealized-casts`" (#158295) Reverts llvm/llvm-project#158067 Buildbot is broken. --- .../mlir/Transforms/DialectConversion.h | 3 - .../Transforms/Utils/DialectConversion.cpp | 151 +++++------------- .../reconcile-unrealized-casts.mlir | 50 ------ ...assume-alignment-runtime-verification.mlir | 3 +- .../atomic-rmw-runtime-verification.mlir | 3 +- .../MemRef/store-runtime-verification.mlir | 3 +- 6 files changed, 42 insertions(+), 171 deletions(-) diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index f8caae3ce9995..a096f82a4cfd8 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -1428,9 +1428,6 @@ struct ConversionConfig { /// /// In the above example, %0 can be used instead of %3 and all cast ops are /// folded away. -void reconcileUnrealizedCasts( - const DenseSet &castOps, - SmallVectorImpl *remainingCastOps = nullptr); void reconcileUnrealizedCasts( ArrayRef castOps, SmallVectorImpl *remainingCastOps = nullptr); diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index d53e1e78f2027..df9700f11200f 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -3100,7 +3100,6 @@ unsigned OperationLegalizer::applyCostModelToPatterns( //===----------------------------------------------------------------------===// // OperationConverter //===----------------------------------------------------------------------===// - namespace { enum OpConversionMode { /// In this mode, the conversion will ignore failed conversions to allow @@ -3118,13 +3117,6 @@ enum OpConversionMode { } // namespace namespace mlir { - -// Predeclaration only. -static void reconcileUnrealizedCasts( - const DenseMap - &castOps, - SmallVectorImpl *remainingCastOps); - // This class converts operations to a given conversion target via a set of // rewrite patterns. The conversion behaves differently depending on the // conversion mode. @@ -3272,13 +3264,18 @@ LogicalResult OperationConverter::convertOperations(ArrayRef ops) { // After a successful conversion, apply rewrites. rewriterImpl.applyRewrites(); - // Reconcile all UnrealizedConversionCastOps that were inserted by the - // dialect conversion frameworks. (Not the ones that were inserted by - // patterns.) + // Gather all unresolved materializations. + SmallVector allCastOps; const DenseMap &materializations = rewriterImpl.unresolvedMaterializations; + for (auto it : materializations) + allCastOps.push_back(it.first); + + // Reconcile all UnrealizedConversionCastOps that were inserted by the + // dialect conversion frameworks. (Not the one that were inserted by + // patterns.) SmallVector remainingCastOps; - reconcileUnrealizedCasts(materializations, &remainingCastOps); + reconcileUnrealizedCasts(allCastOps, &remainingCastOps); // Drop markers. for (UnrealizedConversionCastOp castOp : remainingCastOps) @@ -3306,19 +3303,20 @@ LogicalResult OperationConverter::convertOperations(ArrayRef ops) { // Reconcile Unrealized Casts //===----------------------------------------------------------------------===// -/// Try to reconcile all given UnrealizedConversionCastOps and store the -/// left-over ops in `remainingCastOps` (if provided). See documentation in -/// DialectConversion.h for more details. -/// The `isCastOpOfInterestFn` is used to filter the cast ops to proceed: the -/// algorithm may visit an operand (or user) which is a cast op, but will not -/// try to reconcile it if not in the filtered set. -template -static void reconcileUnrealizedCastsImpl( - RangeT castOps, - function_ref isCastOpOfInterestFn, +void mlir::reconcileUnrealizedCasts( + ArrayRef castOps, SmallVectorImpl *remainingCastOps) { - // A worklist of cast ops to process. SetVector worklist(llvm::from_range, castOps); + // This set is maintained only if `remainingCastOps` is provided. + DenseSet erasedOps; + + // Helper function that adds all operands to the worklist that are an + // unrealized_conversion_cast op result. + auto enqueueOperands = [&](UnrealizedConversionCastOp castOp) { + for (Value v : castOp.getInputs()) + if (auto inputCastOp = v.getDefiningOp()) + worklist.insert(inputCastOp); + }; // Helper function that return the unrealized_conversion_cast op that // defines all inputs of the given op (in the same order). Return "nullptr" @@ -3339,110 +3337,39 @@ static void reconcileUnrealizedCastsImpl( // Process ops in the worklist bottom-to-top. while (!worklist.empty()) { UnrealizedConversionCastOp castOp = worklist.pop_back_val(); + if (castOp->use_empty()) { + // DCE: If the op has no users, erase it. Add the operands to the + // worklist to find additional DCE opportunities. + enqueueOperands(castOp); + if (remainingCastOps) + erasedOps.insert(castOp.getOperation()); + castOp->erase(); + continue; + } // Traverse the chain of input cast ops to see if an op with the same // input types can be found. UnrealizedConversionCastOp nextCast = castOp; while (nextCast) { if (nextCast.getInputs().getTypes() == castOp.getResultTypes()) { - if (llvm::any_of(nextCast.getInputs(), [&](Value v) { - return v.getDefiningOp() == castOp; - })) { - // Ran into a cycle. - break; - } - // Found a cast where the input types match the output types of the - // matched op. We can directly use those inputs. + // matched op. We can directly use those inputs and the matched op can + // be removed. + enqueueOperands(castOp); castOp.replaceAllUsesWith(nextCast.getInputs()); + if (remainingCastOps) + erasedOps.insert(castOp.getOperation()); + castOp->erase(); break; } nextCast = getInputCast(nextCast); } } - // A set of all alive cast ops. I.e., ops whose results are (transitively) - // used by an op that is not a cast op. - DenseSet liveOps; - - // Helper function that marks the given op and transitively reachable input - // cast ops as alive. - auto markOpLive = [&](Operation *rootOp) { - SmallVector worklist; - worklist.push_back(rootOp); - while (!worklist.empty()) { - Operation *op = worklist.pop_back_val(); - if (liveOps.insert(op).second) { - // Successfully inserted: process reachable input cast ops. - for (Value v : op->getOperands()) - if (auto castOp = v.getDefiningOp()) - if (isCastOpOfInterestFn(castOp)) - worklist.push_back(castOp); - } - } - }; - - // Find all alive cast ops. - for (UnrealizedConversionCastOp op : castOps) { - // The op may have been marked live already as being an operand of another - // live cast op. - if (liveOps.contains(op.getOperation())) - continue; - // If any of the users is not a cast op, mark the current op (and its - // input ops) as live. - if (llvm::any_of(op->getUsers(), [&](Operation *user) { - auto castOp = dyn_cast(user); - return !castOp || !isCastOpOfInterestFn(castOp); - })) - markOpLive(op); - } - - // Erase all dead cast ops. - for (UnrealizedConversionCastOp op : castOps) { - if (liveOps.contains(op)) { - // Op is alive and was not erased. Add it to the remaining cast ops. - if (remainingCastOps) + if (remainingCastOps) + for (UnrealizedConversionCastOp op : castOps) + if (!erasedOps.contains(op.getOperation())) remainingCastOps->push_back(op); - continue; - } - - // Op is dead. Erase it. - op->dropAllUses(); - op->erase(); - } -} - -void mlir::reconcileUnrealizedCasts( - ArrayRef castOps, - SmallVectorImpl *remainingCastOps) { - // Set of all cast ops for faster lookups. - DenseSet castOpSet; - for (UnrealizedConversionCastOp op : castOps) - castOpSet.insert(op); - reconcileUnrealizedCasts(castOpSet, remainingCastOps); -} - -void mlir::reconcileUnrealizedCasts( - const DenseSet &castOps, - SmallVectorImpl *remainingCastOps) { - reconcileUnrealizedCastsImpl( - llvm::make_range(castOps.begin(), castOps.end()), - [&](UnrealizedConversionCastOp castOp) { - return castOps.contains(castOp); - }, - remainingCastOps); -} - -static void mlir::reconcileUnrealizedCasts( - const DenseMap - &castOps, - SmallVectorImpl *remainingCastOps) { - reconcileUnrealizedCastsImpl( - castOps.keys(), - [&](UnrealizedConversionCastOp castOp) { - return castOps.contains(castOp); - }, - remainingCastOps); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir b/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir index ac5ca321c066f..3573114f5e038 100644 --- a/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir +++ b/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir @@ -194,53 +194,3 @@ func.func @emptyCast() -> index { %0 = builtin.unrealized_conversion_cast to index return %0 : index } - -// ----- - -// CHECK-LABEL: test.graph_region -// CHECK-NEXT: "test.return"() : () -> () -test.graph_region { - %0 = builtin.unrealized_conversion_cast %2 : i32 to i64 - %1 = builtin.unrealized_conversion_cast %0 : i64 to i16 - %2 = builtin.unrealized_conversion_cast %1 : i16 to i32 - "test.return"() : () -> () -} - -// ----- - -// CHECK-LABEL: test.graph_region -// CHECK-NEXT: %[[cast0:.*]] = builtin.unrealized_conversion_cast %[[cast2:.*]] : i32 to i64 -// CHECK-NEXT: %[[cast1:.*]] = builtin.unrealized_conversion_cast %[[cast0]] : i64 to i16 -// CHECK-NEXT: %[[cast2]] = builtin.unrealized_conversion_cast %[[cast1]] : i16 to i32 -// CHECK-NEXT: "test.user"(%[[cast2]]) : (i32) -> () -// CHECK-NEXT: "test.return"() : () -> () -test.graph_region { - %0 = builtin.unrealized_conversion_cast %2 : i32 to i64 - %1 = builtin.unrealized_conversion_cast %0 : i64 to i16 - %2 = builtin.unrealized_conversion_cast %1 : i16 to i32 - "test.user"(%2) : (i32) -> () - "test.return"() : () -> () -} - -// ----- - -// CHECK-LABEL: test.graph_region -// CHECK-NEXT: "test.return"() : () -> () -test.graph_region { - %0 = builtin.unrealized_conversion_cast %0 : i32 to i32 - "test.return"() : () -> () -} - -// ----- - -// CHECK-LABEL: test.graph_region -// CHECK-NEXT: %[[c0:.*]] = arith.constant -// CHECK-NEXT: %[[cast:.*]]:2 = builtin.unrealized_conversion_cast %[[c0]], %[[cast]]#1 : i32, i32 to i32, i32 -// CHECK-NEXT: "test.user"(%[[cast]]#0) : (i32) -> () -// CHECK-NEXT: "test.return"() : () -> () -test.graph_region { - %cst = arith.constant 0 : i32 - %0, %1 = builtin.unrealized_conversion_cast %cst, %1 : i32, i32 to i32, i32 - "test.user"(%0) : (i32) -> () - "test.return"() : () -> () -} diff --git a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir index 01a826a638606..25a338df8d790 100644 --- a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir @@ -1,8 +1,7 @@ // RUN: mlir-opt %s -generate-runtime-verification \ // RUN: -expand-strided-metadata \ // RUN: -test-cf-assert \ -// RUN: -convert-to-llvm \ -// RUN: -reconcile-unrealized-casts | \ +// RUN: -convert-to-llvm | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir index 1144a7caf36e8..4c6a48d577a6c 100644 --- a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir @@ -1,7 +1,6 @@ // RUN: mlir-opt %s -generate-runtime-verification \ // RUN: -test-cf-assert \ -// RUN: -convert-to-llvm \ -// RUN: -reconcile-unrealized-casts | \ +// RUN: -convert-to-llvm | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir index 82e63805cd027..dd000c6904bcb 100644 --- a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir @@ -1,7 +1,6 @@ // RUN: mlir-opt %s -generate-runtime-verification \ // RUN: -test-cf-assert \ -// RUN: -convert-to-llvm \ -// RUN: -reconcile-unrealized-casts | \ +// RUN: -convert-to-llvm | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ // RUN: FileCheck %s From 1a746b6ca3862165360c48fff5d807d5b400b541 Mon Sep 17 00:00:00 2001 From: Davide Grohmann Date: Fri, 12 Sep 2025 15:51:35 +0200 Subject: [PATCH 126/734] [mlir][spirv] Add support for SPV_ARM_graph extension - part 2 (#156665) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the second patch to add support for the `SPV_ARM_graph` SPIR-V extension to MLIR’s SPIR-V dialect. The extension introduces a new `Graph` abstraction for expressing dataflow computations over full resources. The part 2 implementation includes: - Serialization and deserialization support for: - `OpGraphARM`, `OpGraphInputARM`, `OpGraphSetOutputARM`, `OpGraphEndARM` - `OpGraphEntryPointARM`, `OpGraphConstantARM`, `OpTypeGraphARM` - Tests covering binary round-tripping. Graphs currently support only `SPV_ARM_tensors`, but are designed to generalize to other resource types, such as images. Spec: https://github.com/KhronosGroup/SPIRV-Registry/pull/346 RFC: https://discourse.llvm.org/t/rfc-add-support-for-spv-arm-graph-extension-in-mlir-spir-v-dialect/86947 --------- Signed-off-by: Davide Grohmann --- .../SPIRV/Deserialization/DeserializeOps.cpp | 22 ++ .../SPIRV/Deserialization/Deserializer.cpp | 287 ++++++++++++++++++ .../SPIRV/Deserialization/Deserializer.h | 51 +++- .../SPIRV/Serialization/SerializeOps.cpp | 122 ++++++++ .../Target/SPIRV/Serialization/Serializer.cpp | 80 ++++- .../Target/SPIRV/Serialization/Serializer.h | 39 ++- mlir/test/Target/SPIRV/graph-ops.mlir | 25 ++ 7 files changed, 619 insertions(+), 7 deletions(-) create mode 100644 mlir/test/Target/SPIRV/graph-ops.mlir diff --git a/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp b/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp index ee18cf815e4a7..c27f9aa91332c 100644 --- a/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp +++ b/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp @@ -86,6 +86,13 @@ Value spirv::Deserializer::getValue(uint32_t id) { if (auto undef = getUndefType(id)) { return spirv::UndefOp::create(opBuilder, unknownLoc, undef); } + if (std::optional + graphConstantARMInfo = getGraphConstantARM(id)) { + IntegerAttr graphConstantID = graphConstantARMInfo->graphConstantID; + Type resultType = graphConstantARMInfo->resultType; + return spirv::GraphConstantARMOp::create(opBuilder, unknownLoc, resultType, + graphConstantID); + } return valueMap.lookup(id); } @@ -180,6 +187,7 @@ LogicalResult spirv::Deserializer::processInstruction( case spirv::Opcode::OpTypeStruct: case spirv::Opcode::OpTypePointer: case spirv::Opcode::OpTypeTensorARM: + case spirv::Opcode::OpTypeGraphARM: case spirv::Opcode::OpTypeCooperativeMatrixKHR: return processType(opcode, operands); case spirv::Opcode::OpTypeForwardPointer: @@ -208,12 +216,26 @@ LogicalResult spirv::Deserializer::processInstruction( return processConstantBool(/*isTrue=*/false, operands, /*isSpec=*/true); case spirv::Opcode::OpConstantNull: return processConstantNull(operands); + case spirv::Opcode::OpGraphConstantARM: + return processGraphConstantARM(operands); case spirv::Opcode::OpDecorate: return processDecoration(operands); case spirv::Opcode::OpMemberDecorate: return processMemberDecoration(operands); case spirv::Opcode::OpFunction: return processFunction(operands); + case spirv::Opcode::OpGraphEntryPointARM: + if (deferInstructions) { + deferredInstructions.emplace_back(opcode, operands); + return success(); + } + return processGraphEntryPointARM(operands); + case spirv::Opcode::OpGraphARM: + return processGraphARM(operands); + case spirv::Opcode::OpGraphSetOutputARM: + return processOpGraphSetOutputARM(operands); + case spirv::Opcode::OpGraphEndARM: + return processGraphEndARM(operands); case spirv::Opcode::OpLabel: return processLabel(operands); case spirv::Opcode::OpBranch: diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp index 3625dd2eb7dd3..0c3e87a8dc1ef 100644 --- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp +++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp @@ -669,6 +669,200 @@ spirv::Deserializer::processFunctionEnd(ArrayRef operands) { return success(); } +LogicalResult +spirv::Deserializer::processGraphEntryPointARM(ArrayRef operands) { + if (operands.size() < 2) { + return emitError(unknownLoc, + "missing graph defintion in OpGraphEntryPointARM"); + } + + unsigned wordIndex = 0; + uint32_t graphID = operands[wordIndex++]; + if (!graphMap.contains(graphID)) { + return emitError(unknownLoc, + "missing graph definition/declaration with id ") + << graphID; + } + + spirv::GraphARMOp graphARM = graphMap[graphID]; + StringRef name = decodeStringLiteral(operands, wordIndex); + graphARM.setSymName(name); + graphARM.setEntryPoint(true); + + SmallVector interface; + for (int64_t size = operands.size(); wordIndex < size; ++wordIndex) { + if (spirv::GlobalVariableOp arg = getGlobalVariable(operands[wordIndex])) { + interface.push_back(SymbolRefAttr::get(arg.getOperation())); + } else { + return emitError(unknownLoc, "undefined result ") + << operands[wordIndex] << " while decoding OpGraphEntryPoint"; + } + } + + // RAII guard to reset the insertion point to previous value when done. + OpBuilder::InsertionGuard insertionGuard(opBuilder); + opBuilder.setInsertionPoint(graphARM); + opBuilder.create( + unknownLoc, SymbolRefAttr::get(opBuilder.getContext(), name), + opBuilder.getArrayAttr(interface)); + + return success(); +} + +LogicalResult +spirv::Deserializer::processGraphARM(ArrayRef operands) { + if (curGraph) { + return emitError(unknownLoc, "found graph inside graph"); + } + // Get the result type. + if (operands.size() < 2) { + return emitError(unknownLoc, "OpGraphARM must have at least 2 parameters"); + } + + Type type = getType(operands[0]); + if (!type || !isa(type)) { + return emitError(unknownLoc, "unknown graph type from ") + << operands[0]; + } + auto graphType = cast(type); + if (graphType.getNumResults() <= 0) { + return emitError(unknownLoc, "expected at least one result"); + } + + uint32_t graphID = operands[1]; + if (graphMap.count(graphID)) { + return emitError(unknownLoc, "duplicate graph definition/declaration"); + } + + std::string graphName = getGraphSymbol(graphID); + auto graphOp = + opBuilder.create(unknownLoc, graphName, graphType); + curGraph = graphMap[graphID] = graphOp; + Block *entryBlock = graphOp.addEntryBlock(); + LLVM_DEBUG({ + logger.startLine() + << "//===-------------------------------------------===//\n"; + logger.startLine() << "[graph] name: " << graphName << "\n"; + logger.startLine() << "[graph] type: " << graphType << "\n"; + logger.startLine() << "[graph] ID: " << graphID << "\n"; + logger.startLine() << "[graph] entry block: " << entryBlock << "\n"; + logger.indent(); + }); + + // Parse the op argument instructions. + for (auto [index, argType] : llvm::enumerate(graphType.getInputs())) { + spirv::Opcode opcode; + ArrayRef operands; + if (failed(sliceInstruction(opcode, operands, + spirv::Opcode::OpGraphInputARM))) { + return failure(); + } + if (operands.size() != 3) { + return emitError(unknownLoc, "expected result type, result and " + "input index for OpGraphInputARM"); + } + + Type argDefinedType = getType(operands[0]); + if (!argDefinedType) { + return emitError(unknownLoc, "unknown operand type ") << operands[0]; + } + + if (argDefinedType != argType) { + return emitError(unknownLoc, + "mismatch in argument type between graph type " + "definition ") + << graphType << " and argument type definition " << argDefinedType + << " at argument " << index; + } + if (getValue(operands[1])) { + return emitError(unknownLoc, "duplicate definition of result ") + << operands[1]; + } + + IntegerAttr inputIndexAttr = getConstantInt(operands[2]); + if (!inputIndexAttr) { + return emitError(unknownLoc, + "unable to read inputIndex value from constant op ") + << operands[2]; + } + BlockArgument argValue = graphOp.getArgument(inputIndexAttr.getInt()); + valueMap[operands[1]] = argValue; + } + + graphOutputs.resize(graphType.getNumResults()); + + // RAII guard to reset the insertion point to the module's region after + // deserializing the body of this function. + OpBuilder::InsertionGuard moduleInsertionGuard(opBuilder); + + blockMap[graphID] = entryBlock; + if (failed(createGraphBlock(graphID))) { + return failure(); + } + + // Process all the instructions in the graph until and including + // OpGraphEndARM. + spirv::Opcode opcode; + ArrayRef instOperands; + do { + if (failed(sliceInstruction(opcode, instOperands, std::nullopt))) { + return failure(); + } + + if (failed(processInstruction(opcode, instOperands))) { + return failure(); + } + } while (opcode != spirv::Opcode::OpGraphEndARM); + + return success(); +} + +LogicalResult +spirv::Deserializer::processOpGraphSetOutputARM(ArrayRef operands) { + if (operands.size() != 2) { + return emitError( + unknownLoc, + "expected value id and output index for OpGraphSetOutputARM"); + } + + uint32_t id = operands[0]; + Value value = getValue(id); + if (!value) { + return emitError(unknownLoc, "could not find result ") << id; + } + + IntegerAttr outputIndexAttr = getConstantInt(operands[1]); + if (!outputIndexAttr) { + return emitError(unknownLoc, + "unable to read outputIndex value from constant op ") + << operands[1]; + } + graphOutputs[outputIndexAttr.getInt()] = value; + return success(); +} + +LogicalResult +spirv::Deserializer::processGraphEndARM(ArrayRef operands) { + // Create GraphOutputsARM instruction. + opBuilder.create(unknownLoc, graphOutputs); + + // Process OpGraphEndARM. + if (!operands.empty()) { + return emitError(unknownLoc, "unexpected operands for OpGraphEndARM"); + } + + curBlock = nullptr; + curGraph = std::nullopt; + graphOutputs.clear(); + + LLVM_DEBUG({ + logger.unindent(); + logger.startLine() + << "//===-------------------------------------------===//\n"; + }); + return success(); +} + std::optional> spirv::Deserializer::getConstant(uint32_t id) { auto constIt = constantMap.find(id); @@ -701,6 +895,14 @@ std::string spirv::Deserializer::getFunctionSymbol(uint32_t id) { return funcName; } +std::string spirv::Deserializer::getGraphSymbol(uint32_t id) { + std::string graphName = nameMap.lookup(id).str(); + if (graphName.empty()) { + graphName = "spirv_graph_" + std::to_string(id); + } + return graphName; +} + std::string spirv::Deserializer::getSpecConstantSymbol(uint32_t id) { auto constName = nameMap.lookup(id).str(); if (constName.empty()) { @@ -723,6 +925,14 @@ spirv::Deserializer::createSpecConstant(Location loc, uint32_t resultID, return op; } +std::optional +spirv::Deserializer::getGraphConstantARM(uint32_t id) { + auto graphConstIt = graphConstantMap.find(id); + if (graphConstIt == graphConstantMap.end()) + return std::nullopt; + return graphConstIt->getSecond(); +} + LogicalResult spirv::Deserializer::processGlobalVariable(ArrayRef operands) { unsigned wordIndex = 0; @@ -944,6 +1154,8 @@ LogicalResult spirv::Deserializer::processType(spirv::Opcode opcode, return processMatrixType(operands); case spirv::Opcode::OpTypeTensorARM: return processTensorARMType(operands); + case spirv::Opcode::OpTypeGraphARM: + return processGraphTypeARM(operands); default: return emitError(unknownLoc, "unhandled type instruction"); } @@ -1311,6 +1523,35 @@ spirv::Deserializer::processTensorARMType(ArrayRef operands) { return success(); } +LogicalResult +spirv::Deserializer::processGraphTypeARM(ArrayRef operands) { + unsigned size = operands.size(); + if (size < 2) { + return emitError(unknownLoc, "OpTypeGraphARM must have at least 2 operands " + "(result_id, num_inputs, (inout0_type, " + "inout1_type, ...))") + << size; + } + uint32_t numInputs = operands[1]; + SmallVector argTypes; + SmallVector returnTypes; + for (unsigned i = 2; i < size; ++i) { + Type inOutTy = getType(operands[i]); + if (!inOutTy) { + return emitError(unknownLoc, + "OpTypeGraphARM references undefined element type.") + << operands[i]; + } + if (i - 2 >= numInputs) { + returnTypes.push_back(inOutTy); + } else { + argTypes.push_back(inOutTy); + } + } + typeMap[operands[0]] = GraphType::get(context, argTypes, returnTypes); + return success(); +} + LogicalResult spirv::Deserializer::processTypeForwardPointer(ArrayRef operands) { if (operands.size() != 2) @@ -1823,6 +2064,34 @@ spirv::Deserializer::processConstantNull(ArrayRef operands) { << resultType; } +LogicalResult +spirv::Deserializer::processGraphConstantARM(ArrayRef operands) { + if (operands.size() < 3) { + return emitError(unknownLoc) + << "OpGraphConstantARM must have at least 2 operands"; + } + + Type resultType = getType(operands[0]); + if (!resultType) { + return emitError(unknownLoc, "undefined result type from ") + << operands[0]; + } + + uint32_t resultID = operands[1]; + + if (!dyn_cast(resultType)) { + return emitError(unknownLoc, "result must be of type OpTypeTensorARM"); + } + + APInt graph_constant_id = APInt(32, operands[2], /*isSigned=*/true); + Type i32Ty = opBuilder.getIntegerType(32); + IntegerAttr attr = opBuilder.getIntegerAttr(i32Ty, graph_constant_id); + graphConstantMap.try_emplace( + resultID, GraphConstantARMOpMaterializationInfo{resultType, attr}); + + return success(); +} + //===----------------------------------------------------------------------===// // Control flow //===----------------------------------------------------------------------===// @@ -1920,6 +2189,24 @@ LogicalResult spirv::Deserializer::processLabel(ArrayRef operands) { return success(); } +LogicalResult spirv::Deserializer::createGraphBlock(uint32_t graphID) { + if (!curGraph) { + return emitError(unknownLoc, "a graph block must appear inside a graph"); + } + + // We may have forward declared this block. + Block *block = getOrCreateBlock(graphID); + LLVM_DEBUG(logger.startLine() + << "[block] populating block " << block << "\n"); + // If we have seen this block, make sure it was just a forward declaration. + assert(block->empty() && "re-deserialize the same block!"); + + opBuilder.setInsertionPointToStart(block); + blockMap[graphID] = curBlock = block; + + return success(); +} + LogicalResult spirv::Deserializer::processSelectionMerge(ArrayRef operands) { if (!curBlock) { diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h index db1cc3f8d79c2..6027f1ac94c23 100644 --- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h +++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h @@ -106,6 +106,13 @@ struct SpecConstOperationMaterializationInfo { SmallVector enclosedOpOperands; }; +/// A struct that collects the info needed to materialize/emit a +/// GraphConstantARMOp. +struct GraphConstantARMOpMaterializationInfo { + Type resultType; + IntegerAttr graphConstantID; +}; + //===----------------------------------------------------------------------===// // Deserializer Declaration //===----------------------------------------------------------------------===// @@ -211,9 +218,14 @@ class Deserializer { /// exists; otherwise creates one based on the . std::string getFunctionSymbol(uint32_t id); - /// Returns a symbol to be used for the specialization constant with the given - /// result . This tries to use the specialization constant's OpName if + /// Returns a symbol to be used for the graph name with the given + /// result . This tries to use the graph's OpName if /// exists; otherwise creates one based on the . + std::string getGraphSymbol(uint32_t id); + + /// Returns a symbol to be used for the specialization constant with the + /// given result . This tries to use the specialization constant's + /// OpName if exists; otherwise creates one based on the . std::string getSpecConstantSymbol(uint32_t id); /// Gets the specialization constant with the given result . @@ -237,6 +249,11 @@ class Deserializer { spirv::SpecConstantOp createSpecConstant(Location loc, uint32_t resultID, TypedAttr defaultValue); + /// Gets the GraphConstantARM ID attribute and result type with the given + /// result . + std::optional + getGraphConstantARM(uint32_t id); + /// Processes the OpVariable instructions at current `offset` into `binary`. /// It is expected that this method is used for variables that are to be /// defined at module scope and will be deserialized into a @@ -306,6 +323,16 @@ class Deserializer { LogicalResult processTensorARMType(ArrayRef operands); + LogicalResult processGraphTypeARM(ArrayRef operands); + + LogicalResult processGraphEntryPointARM(ArrayRef operands); + + LogicalResult processGraphARM(ArrayRef operands); + + LogicalResult processOpGraphSetOutputARM(ArrayRef operands); + + LogicalResult processGraphEndARM(ArrayRef operands); + LogicalResult processTypeForwardPointer(ArrayRef operands); //===--------------------------------------------------------------------===// @@ -353,6 +380,10 @@ class Deserializer { /// Processes a SPIR-V OpConstantNull instruction with the given `operands`. LogicalResult processConstantNull(ArrayRef operands); + /// Processes a SPIR-V OpGraphConstantARM instruction with the given + /// `operands`. + LogicalResult processGraphConstantARM(ArrayRef operands); + //===--------------------------------------------------------------------===// // Debug //===--------------------------------------------------------------------===// @@ -450,6 +481,9 @@ class Deserializer { /// blocks declared as selection/loop headers are handled. LogicalResult structurizeControlFlow(); + /// Creates a block for graph with the given graphID. + LogicalResult createGraphBlock(uint32_t graphID); + //===--------------------------------------------------------------------===// // Instruction //===--------------------------------------------------------------------===// @@ -546,6 +580,9 @@ class Deserializer { /// The current function under construction. std::optional curFunction; + /// The current graph under construction. + std::optional curGraph; + /// The current block under construction. Block *curBlock = nullptr; @@ -599,12 +636,19 @@ class Deserializer { DenseMap specConstOperationMap; + // Result to GraphConstantARM ID attribute and result type. + DenseMap + graphConstantMap; + // Result to variable mapping. DenseMap globalVariableMap; // Result to function mapping. DenseMap funcMap; + // Result to function mapping. + DenseMap graphMap; + // Result to block mapping. DenseMap blockMap; @@ -668,6 +712,9 @@ class Deserializer { /// Deserialization options. DeserializationOptions options; + /// List of IDs assigned to graph outputs. + SmallVector graphOutputs; + #ifndef NDEBUG /// A logger used to emit information during the deserialzation process. llvm::ScopedPrinter logger; diff --git a/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp b/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp index d62529b85b3aa..e9b180a70bb23 100644 --- a/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp +++ b/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp @@ -203,6 +203,16 @@ Serializer::processSpecConstantOperationOp(spirv::SpecConstantOperationOp op) { return success(); } +LogicalResult +Serializer::processGraphConstantARMOp(spirv::GraphConstantARMOp op) { + if (uint32_t resultID = prepareGraphConstantId(op.getLoc(), op.getType(), + op.getGraphConstantIdAttr())) { + valueIDMap[op.getResult()] = resultID; + return success(); + } + return failure(); +} + LogicalResult Serializer::processUndefOp(spirv::UndefOp op) { auto undefType = op.getType(); auto &id = undefValIDMap[undefType]; @@ -368,6 +378,118 @@ LogicalResult Serializer::processFuncOp(spirv::FuncOp op) { return success(); } +LogicalResult Serializer::processGraphARMOp(spirv::GraphARMOp op) { + if (op.getNumResults() < 1) { + return op.emitError("cannot serialize graph with no return types"); + } + + LLVM_DEBUG(llvm::dbgs() << "-- start graph '" << op.getName() << "' --\n"); + assert(functionHeader.empty() && functionBody.empty()); + + uint32_t funcID = getOrCreateFunctionID(op.getName()); + uint32_t fnTypeID = 0; + // Generate type of the function. + if (failed(processType(op.getLoc(), op.getFunctionType(), fnTypeID))) + return failure(); + encodeInstructionInto(functionHeader, spirv::Opcode::OpGraphARM, + {fnTypeID, funcID}); + + // Declare the parameters. + for (auto [idx, arg] : llvm::enumerate(op.getArguments())) { + uint32_t argTypeID = 0; + SmallVector inputOperands; + + if (failed(processType(op.getLoc(), arg.getType(), argTypeID))) { + return failure(); + } + + uint32_t argValueID = getNextID(); + valueIDMap[arg] = argValueID; + + auto attr = IntegerAttr::get(IntegerType::get(op.getContext(), 32), idx); + uint32_t indexID = prepareConstantInt(op.getLoc(), attr, false); + + inputOperands.push_back(argTypeID); + inputOperands.push_back(argValueID); + inputOperands.push_back(indexID); + + encodeInstructionInto(functionHeader, spirv::Opcode::OpGraphInputARM, + inputOperands); + } + + if (failed(processBlock(&op.front(), /*omitLabel=*/true))) + return failure(); + if (failed(visitInPrettyBlockOrder( + &op.front(), [&](Block *block) { return processBlock(block); }, + /*skipHeader=*/true))) { + return failure(); + } + + LLVM_DEBUG(llvm::dbgs() << "-- completed graph '" << op.getName() + << "' --\n"); + // Insert OpGraphEndARM. + encodeInstructionInto(functionBody, spirv::Opcode::OpGraphEndARM, {}); + + llvm::append_range(graphs, functionHeader); + llvm::append_range(graphs, functionBody); + functionHeader.clear(); + functionBody.clear(); + + return success(); +} + +LogicalResult +Serializer::processGraphEntryPointARMOp(spirv::GraphEntryPointARMOp op) { + SmallVector operands; + StringRef graph = op.getFn(); + // Add the graph . + uint32_t graphID = getOrCreateFunctionID(graph); + operands.push_back(graphID); + // Add the name of the graph. + spirv::encodeStringLiteralInto(operands, graph); + + // Add the interface values. + if (ArrayAttr interface = op.getInterface()) { + for (Attribute var : interface.getValue()) { + StringRef value = cast(var).getValue(); + if (uint32_t id = getVariableID(value)) { + operands.push_back(id); + } else { + return op.emitError( + "referencing undefined global variable." + "spirv.GraphEntryPointARM is at the end of spirv.module. All " + "referenced variables should already be defined"); + } + } + } + encodeInstructionInto(graphs, spirv::Opcode::OpGraphEntryPointARM, operands); + return success(); +} + +LogicalResult +Serializer::processGraphOutputsARMOp(spirv::GraphOutputsARMOp op) { + for (auto [idx, value] : llvm::enumerate(op->getOperands())) { + SmallVector outputOperands; + + Type resType = value.getType(); + uint32_t resTypeID = 0; + if (failed(processType(op.getLoc(), resType, resTypeID))) { + return failure(); + } + + uint32_t outputID = getValueID(value); + auto attr = IntegerAttr::get(IntegerType::get(op.getContext(), 32), idx); + uint32_t indexID = prepareConstantInt(op.getLoc(), attr, false); + + outputOperands.push_back(outputID); + outputOperands.push_back(indexID); + + encodeInstructionInto(functionBody, spirv::Opcode::OpGraphSetOutputARM, + outputOperands); + } + return success(); +} + LogicalResult Serializer::processVariableOp(spirv::VariableOp op) { SmallVector operands; SmallVector elidedAttrs; diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp index 7fc779587f4f1..b56e7788625f5 100644 --- a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp @@ -136,7 +136,7 @@ void Serializer::collect(SmallVectorImpl &binary) { extensions.size() + extendedSets.size() + memoryModel.size() + entryPoints.size() + executionModes.size() + decorations.size() + - typesGlobalValues.size() + functions.size(); + typesGlobalValues.size() + functions.size() + graphs.size(); binary.clear(); binary.reserve(moduleSize); @@ -154,6 +154,7 @@ void Serializer::collect(SmallVectorImpl &binary) { binary.append(decorations.begin(), decorations.end()); binary.append(typesGlobalValues.begin(), typesGlobalValues.end()); binary.append(functions.begin(), functions.end()); + binary.append(graphs.begin(), graphs.end()); } #ifndef NDEBUG @@ -509,6 +510,9 @@ Serializer::processTypeImpl(Location loc, Type type, uint32_t &typeID, if ((isa(type) && succeeded(prepareFunctionType(loc, cast(type), typeEnum, operands))) || + (isa(type) && + succeeded( + prepareGraphType(loc, cast(type), typeEnum, operands))) || succeeded(prepareBasicType(loc, type, typeID, typeEnum, operands, deferSerialization, serializationCtx))) { if (deferSerialization) @@ -539,7 +543,7 @@ Serializer::processTypeImpl(Location loc, Type type, uint32_t &typeID, return success(); } - return failure(); + return emitError(loc, "failed to process type: ") << type; } LogicalResult Serializer::prepareBasicType( @@ -875,6 +879,33 @@ Serializer::prepareFunctionType(Location loc, FunctionType type, return success(); } +LogicalResult +Serializer::prepareGraphType(Location loc, GraphType type, + spirv::Opcode &typeEnum, + SmallVectorImpl &operands) { + typeEnum = spirv::Opcode::OpTypeGraphARM; + assert(type.getNumResults() >= 1 && + "serialization requires at least a return value"); + + operands.push_back(type.getNumInputs()); + + for (Type argType : type.getInputs()) { + uint32_t argTypeID = 0; + if (failed(processType(loc, argType, argTypeID))) + return failure(); + operands.push_back(argTypeID); + } + + for (Type resType : type.getResults()) { + uint32_t resTypeID = 0; + if (failed(processType(loc, resType, resTypeID))) + return failure(); + operands.push_back(resTypeID); + } + + return success(); +} + //===----------------------------------------------------------------------===// // Constant //===----------------------------------------------------------------------===// @@ -1135,6 +1166,41 @@ uint32_t Serializer::prepareConstantInt(Location loc, IntegerAttr intAttr, return resultID; } +uint32_t Serializer::prepareGraphConstantId(Location loc, Type graphConstType, + IntegerAttr intAttr) { + // De-duplicate graph constants. + if (uint32_t id = getGraphConstantARMId(intAttr)) { + return id; + } + + // Process the type for this graph constant. + uint32_t typeID = 0; + if (failed(processType(loc, graphConstType, typeID))) { + return 0; + } + + uint32_t resultID = getNextID(); + APInt value = intAttr.getValue(); + unsigned bitwidth = value.getBitWidth(); + if (bitwidth > 32) { + emitError(loc, "Too wide attribute for OpGraphConstantARM: ") + << bitwidth << " bits"; + return 0; + } + bool isSigned = value.isSignedIntN(bitwidth); + + uint32_t word = 0; + if (isSigned) { + word = static_cast(value.getSExtValue()); + } else { + word = static_cast(value.getZExtValue()); + } + encodeInstructionInto(typesGlobalValues, spirv::Opcode::OpGraphConstantARM, + {typeID, resultID, word}); + graphConstIDMap[intAttr] = resultID; + return resultID; +} + uint32_t Serializer::prepareConstantFp(Location loc, FloatAttr floatAttr, bool isSpec) { if (!isSpec) { @@ -1469,9 +1535,19 @@ LogicalResult Serializer::processOperation(Operation *opInst) { return processConstantCompositeReplicateOp(op); }) .Case([&](spirv::FuncOp op) { return processFuncOp(op); }) + .Case([&](spirv::GraphARMOp op) { return processGraphARMOp(op); }) + .Case([&](spirv::GraphEntryPointARMOp op) { + return processGraphEntryPointARMOp(op); + }) + .Case([&](spirv::GraphOutputsARMOp op) { + return processGraphOutputsARMOp(op); + }) .Case([&](spirv::GlobalVariableOp op) { return processGlobalVariableOp(op); }) + .Case([&](spirv::GraphConstantARMOp op) { + return processGraphConstantARMOp(op); + }) .Case([&](spirv::LoopOp op) { return processLoopOp(op); }) .Case([&](spirv::ReferenceOfOp op) { return processReferenceOfOp(op); }) .Case([&](spirv::SelectionOp op) { return processSelectionOp(op); }) diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.h b/mlir/lib/Target/SPIRV/Serialization/Serializer.h index fb2cecdff8e43..add372b19b5af 100644 --- a/mlir/lib/Target/SPIRV/Serialization/Serializer.h +++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.h @@ -122,6 +122,8 @@ class Serializer { LogicalResult processSpecConstantOperationOp(spirv::SpecConstantOperationOp op); + LogicalResult processGraphConstantARMOp(spirv::GraphConstantARMOp op); + /// SPIR-V dialect supports OpUndef using spirv.UndefOp that produces a SSA /// value to use with other operations. The SPIR-V spec recommends that /// OpUndef be generated at module level. The serialization generates an @@ -135,6 +137,15 @@ class Serializer { LogicalResult processFuncOp(spirv::FuncOp op); LogicalResult processFuncParameter(spirv::FuncOp op); + /// Processes a SPIR-V GraphARM op. + LogicalResult processGraphARMOp(spirv::GraphARMOp op); + + /// Processes a SPIR-V GraphEntryPointARM op. + LogicalResult processGraphEntryPointARMOp(spirv::GraphEntryPointARMOp op); + + /// Processes a SPIR-V GraphOutputsARMOp op. + LogicalResult processGraphOutputsARMOp(spirv::GraphOutputsARMOp op); + LogicalResult processVariableOp(spirv::VariableOp op); /// Process a SPIR-V GlobalVariableOp @@ -189,6 +200,10 @@ class Serializer { spirv::Opcode &typeEnum, SmallVectorImpl &operands); + LogicalResult prepareGraphType(Location loc, GraphType type, + spirv::Opcode &typeEnum, + SmallVectorImpl &operands); + //===--------------------------------------------------------------------===// // Constant //===--------------------------------------------------------------------===// @@ -238,6 +253,13 @@ class Serializer { uint32_t prepareConstantInt(Location loc, IntegerAttr intAttr, bool isSpec = false); + uint32_t getGraphConstantARMId(Attribute value) const { + return graphConstIDMap.lookup(value); + } + + uint32_t prepareGraphConstantId(Location loc, Type graphConstType, + IntegerAttr intAttr); + uint32_t prepareConstantFp(Location loc, FloatAttr floatAttr, bool isSpec = false); @@ -372,6 +394,7 @@ class Serializer { SmallVector decorations; SmallVector typesGlobalValues; SmallVector functions; + SmallVector graphs; /// Recursive struct references are serialized as OpTypePointer instructions /// to the recursive struct type. However, the OpTypePointer instruction @@ -388,15 +411,22 @@ class Serializer { recursiveStructInfos; /// `functionHeader` contains all the instructions that must be in the first - /// block in the function, and `functionBody` contains the rest. After - /// processing FuncOp, the encoded instructions of a function are appended to - /// `functions`. An example of instructions in `functionHeader` in order: + /// block in the function or graph, and `functionBody` contains the rest. + /// After processing FuncOp/GraphARMOp, the encoded instructions of a function + /// or graph are appended to `functions` or `graphs` respectively. Examples of + /// instructions in `functionHeader` in order: + /// + /// For a FuncOp: /// OpFunction ... /// OpFunctionParameter ... /// OpFunctionParameter ... /// OpLabel ... /// OpVariable ... /// OpVariable ... + /// + /// For a GraphARMOp + /// OpGraphARM ... + /// OpGraphInputARM ... SmallVector functionHeader; SmallVector functionBody; @@ -412,6 +442,9 @@ class Serializer { /// Map from specialization constant names to their s. llvm::StringMap specConstIDMap; + /// Map from graph constant ID value to their s. + DenseMap graphConstIDMap; + /// Map from GlobalVariableOps name to s. llvm::StringMap globalVarIDMap; diff --git a/mlir/test/Target/SPIRV/graph-ops.mlir b/mlir/test/Target/SPIRV/graph-ops.mlir new file mode 100644 index 0000000000000..c956157bfa6c1 --- /dev/null +++ b/mlir/test/Target/SPIRV/graph-ops.mlir @@ -0,0 +1,25 @@ +// RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip %s | FileCheck %s +// RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv %s | spirv-val %} + +// CHECK: spirv.module Logical Vulkan requires #spirv.vce { +spirv.module Logical Vulkan requires #spirv.vce { + // CHECK: spirv.GlobalVariable [[VARARG0:@.*]] bind(0, 0) : !spirv.ptr, UniformConstant> + spirv.GlobalVariable @main_arg_0 bind(0, 0) : !spirv.ptr, UniformConstant> + // CHECK: spirv.GlobalVariable [[VARRES0:@.*]] bind(0, 1) : !spirv.ptr, UniformConstant> + spirv.GlobalVariable @main_res_0 bind(0, 1) : !spirv.ptr, UniformConstant> + // CHECK: spirv.ARM.GraphEntryPoint [[GN:@.*]], [[VARARG0]], [[VARRES0]] + spirv.ARM.GraphEntryPoint @main, @main_arg_0, @main_res_0 + // CHECK: spirv.ARM.Graph [[GN]]({{%.*}}: !spirv.arm.tensor<14x19xi16>) -> !spirv.arm.tensor<2x3xi16> attributes {entry_point = true} { + spirv.ARM.Graph @main(%arg0 : !spirv.arm.tensor<14x19xi16>) -> !spirv.arm.tensor<2x3xi16> attributes {entry_point = true} { + // CHECK: [[CONST2:%.*]] = spirv.ARM.GraphConstant {graph_constant_id = 42 : i32} : !spirv.arm.tensor<2x3xi16> + %0 = spirv.ARM.GraphConstant { graph_constant_id = 42 : i32 } : !spirv.arm.tensor<2x3xi16> + // CHECK: spirv.ARM.GraphOutputs [[OUT:%.*]] : !spirv.arm.tensor<2x3xi16> + spirv.ARM.GraphOutputs %0 : !spirv.arm.tensor<2x3xi16> + } + + // CHECK: spirv.ARM.Graph {{@.*}}({{%.*}}: !spirv.arm.tensor<1x16x16x16xi8>) -> !spirv.arm.tensor<1x16x16x16xi8> attributes {entry_point = false} { + spirv.ARM.Graph @empty_graph(%arg0: !spirv.arm.tensor<1x16x16x16xi8>) -> !spirv.arm.tensor<1x16x16x16xi8> { + // CHECK: spirv.ARM.GraphOutputs {{%.*}} : !spirv.arm.tensor<1x16x16x16xi8> + spirv.ARM.GraphOutputs %arg0 : !spirv.arm.tensor<1x16x16x16xi8> + } +} From 248ad71747e0e0f49876e7d95905219822f400a6 Mon Sep 17 00:00:00 2001 From: Amina Chabane Date: Fri, 12 Sep 2025 14:53:54 +0100 Subject: [PATCH 127/734] [AArch64] Correct SCVTF/UCVTF instructions for vector input (#152974) This pull request improves support for scalar floating-point conversions from integer vectors on AArch64, specifically for the `scvtf` and `ucvtf` instructions. It fixes pattern matching so that single-element conversions from vectors now generate the expected scalar instructions and adds a new test to verify correct behavior for extracting a lane from a widened vector. **Pattern matching and code generation improvements:** * Added new patterns in `AArch64InstrInfo.td` to correctly match conversions from `v2i32` to `v1f64` using `scvtf` and `ucvtf`, ensuring the scalar instructions (`scvtf d0, s0` and `ucvtf d0, s0`) are generated when extracting a single lane. **Test updates and additions:** * Updated `scvtf_f64i32_simple` and `ucvtf_f64i32_simple` tests in `fprcvt-cvtf.ll` to reflect the correct generation of scalar instructions, removing previous comments about incorrect codegen and showing the expected output. * Added a new test `uitofp_sext_v2i32_extract_lane0` to verify correct code generation when extracting a lane from a widened vector and converting to double. --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 5 +++ llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll | 37 +++++++++++++-------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f0020a9a3c91d..3fcafc6d35090 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5409,6 +5409,11 @@ defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>; let Predicates = [HasNEON, HasFPRCVT] in { defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf", any_sint_to_fp>; defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf", any_uint_to_fp>; + + def : Pat<(v1f64 (extract_subvector (v2f64 (sint_to_fp (v2i64 (sext (v2i32 V64:$Rn))))), (i64 0))), + (SCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>; + def : Pat<(v1f64 (extract_subvector (v2f64 (uint_to_fp (v2i64 (zext (v2i32 V64:$Rn))))), (i64 0))), + (UCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>; } def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), diff --git a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll index 9da6f583cec01..3ea1a01cfc977 100644 --- a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll +++ b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll @@ -94,16 +94,10 @@ define double @scvtf_f64i32_neg(<4 x i32> %x) { ret double %conv } -; This test does not give the indended result of scvtf d0, s0 -; This is due to the input being loaded as a 2 item vector and -; therefore using vector inputs that do not match the pattern -; This test will be fixed in a future revision define <1 x double> @scvtf_f64i32_simple(<1 x i32> %x) { ; CHECK-LABEL: scvtf_f64i32_simple: ; CHECK: // %bb.0: -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: scvtf v0.2d, v0.2d -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: scvtf d0, s0 ; CHECK-NEXT: ret ; ; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_simple: @@ -315,16 +309,10 @@ define double @ucvtf_f64i32_neg(<4 x i32> %x) { ret double %conv } -; This test does not give the indended result of ucvtf d0, s0 -; This is due to the input being loaded as a 2 item vector and -; therefore using vector inputs that do not match the pattern -; This test will be fixed in a future revision define <1 x double> @ucvtf_f64i32_simple(<1 x i32> %x) { ; CHECK-LABEL: ucvtf_f64i32_simple: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ucvtf v0.2d, v0.2d -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ucvtf d0, s0 ; CHECK-NEXT: ret ; ; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_simple: @@ -449,3 +437,24 @@ define <1 x float> @ucvtf_f32i64_simple(<1 x i64> %x) { %conv = uitofp <1 x i64> %x to <1 x float> ret <1 x float> %conv } + +define <1 x double> @uitofp_sext_v2i32_extract_lane0(<2 x i32> %x) { +; CHECK-LABEL: uitofp_sext_v2i32_extract_lane0: +; CHECK: // %bb.0: +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +; +; CHECK-NO-FPRCVT-LABEL: uitofp_sext_v2i32_extract_lane0: +; CHECK-NO-FPRCVT: // %bb.0: +; CHECK-NO-FPRCVT-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NO-FPRCVT-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NO-FPRCVT-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NO-FPRCVT-NEXT: ret + %wide = sext <2 x i32> %x to <2 x i64> + %fpv2 = uitofp <2 x i64> %wide to <2 x double> + %lane0 = shufflevector <2 x double> %fpv2, <2 x double> poison, <1 x i32> zeroinitializer + ret <1 x double> %lane0 +} + From 8036edb21dbedf79687613caef3d40aa5a50ddf2 Mon Sep 17 00:00:00 2001 From: Davide Grohmann Date: Fri, 12 Sep 2025 15:54:04 +0200 Subject: [PATCH 128/734] [mlir][spirv] Add support for SPV_ARM_graph extension - part 3 (#156845) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the third patch to add support for the `SPV_ARM_graph` SPIR-V extension to MLIR’s SPIR-V dialect. The extension introduces a new `Graph` abstraction for expressing dataflow computations over full resources. The part 3 implementation includes: - ABI lowering support for graph entry points via `LowerABIAttributesPass`. - Tests covering ABI handling. Graphs currently support only `SPV_ARM_tensors`, but are designed to generalize to other resource types, such as images. Spec: https://github.com/KhronosGroup/SPIRV-Registry/pull/346 RFC: https://discourse.llvm.org/t/rfc-add-support-for-spv-arm-graph-extension-in-mlir-spir-v-dialect/86947 --------- Signed-off-by: Davide Grohmann --- mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp | 9 +- .../Transforms/LowerABIAttributesPass.cpp | 115 +++++++++++++++++- .../test/Dialect/SPIRV/IR/target-and-abi.mlir | 8 ++ .../SPIRV/Transforms/abi-interface.mlir | 22 ++++ 4 files changed, 150 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp index fcf1526491971..44c86bc8777e4 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp @@ -1066,7 +1066,12 @@ LogicalResult SPIRVDialect::verifyRegionArgAttribute(Operation *op, } LogicalResult SPIRVDialect::verifyRegionResultAttribute( - Operation *op, unsigned /*regionIndex*/, unsigned /*resultIndex*/, + Operation *op, unsigned /*regionIndex*/, unsigned resultIndex, NamedAttribute attribute) { - return op->emitError("cannot attach SPIR-V attributes to region result"); + if (auto graphOp = dyn_cast(op)) + return verifyRegionAttribute( + op->getLoc(), graphOp.getResultTypes()[resultIndex], attribute); + return op->emitError( + "cannot attach SPIR-V attributes to region result which is " + "not part of a spirv::GraphARMOp type"); } diff --git a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp index 3911ec08fcc27..5607a3cd3660f 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp @@ -22,6 +22,7 @@ #include "mlir/Dialect/SPIRV/Utils/LayoutUtils.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/Support/FormatVariadic.h" namespace mlir { namespace spirv { @@ -85,10 +86,36 @@ createGlobalVarForEntryPointArgument(OpBuilder &builder, spirv::FuncOp funcOp, abiInfo.getBinding()); } +/// Creates a global variable for an argument or result based on the ABI info. +static spirv::GlobalVariableOp +createGlobalVarForGraphEntryPoint(OpBuilder &builder, spirv::GraphARMOp graphOp, + unsigned index, bool isArg, + spirv::InterfaceVarABIAttr abiInfo) { + auto spirvModule = graphOp->getParentOfType(); + if (!spirvModule) + return nullptr; + + OpBuilder::InsertionGuard moduleInsertionGuard(builder); + builder.setInsertionPoint(graphOp.getOperation()); + std::string varName = llvm::formatv("{}_{}_{}", graphOp.getName(), + isArg ? "arg" : "res", index); + + Type varType = isArg ? graphOp.getFunctionType().getInput(index) + : graphOp.getFunctionType().getResult(index); + + auto pointerType = spirv::PointerType::get( + varType, + abiInfo.getStorageClass().value_or(spirv::StorageClass::UniformConstant)); + + return spirv::GlobalVariableOp::create(builder, graphOp.getLoc(), pointerType, + varName, abiInfo.getDescriptorSet(), + abiInfo.getBinding()); +} + /// Gets the global variables that need to be specified as interface variable /// with an spirv.EntryPointOp. Traverses the body of a entry function to do so. static LogicalResult -getInterfaceVariables(spirv::FuncOp funcOp, +getInterfaceVariables(mlir::FunctionOpInterface funcOp, SmallVectorImpl &interfaceVars) { auto module = funcOp->getParentOfType(); if (!module) { @@ -224,6 +251,21 @@ class ProcessInterfaceVarABI final : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override; }; +/// A pattern to convert graph signature according to interface variable ABI +/// attributes. +/// +/// Specifically, this pattern creates global variables according to interface +/// variable ABI attributes attached to graph arguments and results. +class ProcessGraphInterfaceVarABI final + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(spirv::GraphARMOp graphOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override; +}; + /// Pass to implement the ABI information specified as attributes. class LowerABIAttributesPass final : public spirv::impl::SPIRVLowerABIAttributesPassBase< @@ -297,6 +339,63 @@ LogicalResult ProcessInterfaceVarABI::matchAndRewrite( return success(); } +LogicalResult ProcessGraphInterfaceVarABI::matchAndRewrite( + spirv::GraphARMOp graphOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const { + // Non-entry point graphs are not handled. + if (!graphOp.getEntryPoint().value_or(false)) + return failure(); + + TypeConverter::SignatureConversion signatureConverter( + graphOp.getFunctionType().getNumInputs()); + + StringRef attrName = spirv::getInterfaceVarABIAttrName(); + SmallVector interfaceVars; + + // Convert arguments. + unsigned numInputs = graphOp.getFunctionType().getNumInputs(); + unsigned numResults = graphOp.getFunctionType().getNumResults(); + for (unsigned index = 0; index < numInputs; ++index) { + auto abiInfo = + graphOp.getArgAttrOfType(index, attrName); + if (!abiInfo) + return failure(); + spirv::GlobalVariableOp var = createGlobalVarForGraphEntryPoint( + rewriter, graphOp, index, true, abiInfo); + if (!var) + return failure(); + interfaceVars.push_back( + SymbolRefAttr::get(rewriter.getContext(), var.getSymName())); + } + + for (unsigned index = 0; index < numResults; ++index) { + auto abiInfo = graphOp.getResultAttrOfType( + index, attrName); + if (!abiInfo) + return failure(); + spirv::GlobalVariableOp var = createGlobalVarForGraphEntryPoint( + rewriter, graphOp, index, false, abiInfo); + if (!var) + return failure(); + interfaceVars.push_back( + SymbolRefAttr::get(rewriter.getContext(), var.getSymName())); + } + + // Update graph signature. + rewriter.modifyOpInPlace(graphOp, [&] { + for (unsigned index = 0; index < numInputs; ++index) { + graphOp.removeArgAttr(index, attrName); + } + for (unsigned index = 0; index < numResults; ++index) { + graphOp.removeResultAttr(index, rewriter.getStringAttr(attrName)); + } + }); + + spirv::GraphEntryPointARMOp::create(rewriter, graphOp.getLoc(), graphOp, + interfaceVars); + return success(); +} + void LowerABIAttributesPass::runOnOperation() { // Uses the signature conversion methodology of the dialect conversion // framework to implement the conversion. @@ -322,7 +421,8 @@ void LowerABIAttributesPass::runOnOperation() { }); RewritePatternSet patterns(context); - patterns.add(typeConverter, context); + patterns.add( + typeConverter, context); ConversionTarget target(*context); // "Legal" function ops should have no interface variable ABI attributes. @@ -333,6 +433,17 @@ void LowerABIAttributesPass::runOnOperation() { return false; return true; }); + target.addDynamicallyLegalOp([&](spirv::GraphARMOp op) { + StringRef attrName = spirv::getInterfaceVarABIAttrName(); + for (unsigned i = 0, e = op.getNumArguments(); i < e; ++i) + if (op.getArgAttr(i, attrName)) + return false; + for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) + if (op.getResultAttr(i, attrName)) + return false; + return true; + }); + // All other SPIR-V ops are legal. target.markUnknownOpDynamicallyLegal([](Operation *op) { return op->getDialect()->getNamespace() == diff --git a/mlir/test/Dialect/SPIRV/IR/target-and-abi.mlir b/mlir/test/Dialect/SPIRV/IR/target-and-abi.mlir index 10fbcf06eb052..63dea6af83556 100644 --- a/mlir/test/Dialect/SPIRV/IR/target-and-abi.mlir +++ b/mlir/test/Dialect/SPIRV/IR/target-and-abi.mlir @@ -101,6 +101,14 @@ func.func @interface_var( // ----- +// CHECK: {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 0)>} +// CHECK: {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 1)>} +spirv.ARM.Graph @interface_var(%arg: !spirv.arm.tensor<1xf32> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 0)>}) -> ( + !spirv.arm.tensor<1xf32> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 1)>} +) { spirv.ARM.GraphOutputs %arg : !spirv.arm.tensor<1xf32> } + +// ----- + //===----------------------------------------------------------------------===// // spirv.resource_limits //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir index f3a3218e5aec0..04667c828bbd1 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir @@ -35,6 +35,28 @@ spirv.module Logical GLSL450 { // ----- +module attributes { + spirv.target_env = #spirv.target_env< + #spirv.vce, #spirv.resource_limits<>> +} { + +// CHECK-LABEL: spirv.module +spirv.module Logical Vulkan { + // CHECK-DAG: spirv.GlobalVariable [[VARARG0:@.*]] bind(0, 0) : !spirv.ptr, UniformConstant> + // CHECK-DAG: spirv.GlobalVariable [[VARRES0:@.*]] bind(0, 1) : !spirv.ptr, UniformConstant> + + // CHECK: spirv.ARM.GraphEntryPoint [[GN:@.*]], [[VARARG0]], [[VARRES0]] + // CHECK: spirv.ARM.Graph [[GN]]([[ARG0:%.*]]: !spirv.arm.tensor<1x16x16x16xi8>) -> !spirv.arm.tensor<1x16x16x16xi8> attributes {entry_point = true} + spirv.ARM.Graph @main(%arg0: !spirv.arm.tensor<1x16x16x16xi8> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 0)>}) + -> (!spirv.arm.tensor<1x16x16x16xi8> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 1)>}) attributes {entry_point = true} { + spirv.ARM.GraphOutputs %arg0 : !spirv.arm.tensor<1x16x16x16xi8> + } +} // end spirv.module + +} // end module + +// ----- + module { // expected-error@+1 {{'spirv.module' op missing SPIR-V target env attribute}} spirv.module Logical GLSL450 {} From acd08993b2ab7e5d64a7932fb5b4630f51eedc72 Mon Sep 17 00:00:00 2001 From: Abhinav Pappu <53156924+abhinavp5@users.noreply.github.com> Date: Fri, 12 Sep 2025 10:09:13 -0400 Subject: [PATCH 129/734] [X86] Add F16C f16 -> f32 constexpr support (#158142) Fixes #154310 --- clang/lib/Headers/f16cintrin.h | 16 +++++++++++++--- clang/test/CodeGen/X86/f16c-builtins.c | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) mode change 100644 => 100755 clang/test/CodeGen/X86/f16c-builtins.c diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h index ede67afada766..83965334e2c9b 100644 --- a/clang/lib/Headers/f16cintrin.h +++ b/clang/lib/Headers/f16cintrin.h @@ -20,6 +20,14 @@ #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h, * but that's because icc can emulate these without f16c using a library call. * Since we don't do that let's leave these in f16cintrin.h. @@ -35,7 +43,7 @@ /// \param __a /// A 16-bit half-precision float value. /// \returns The converted 32-bit float value. -static __inline float __DEFAULT_FN_ATTRS128 +static __inline float __DEFAULT_FN_ATTRS128_CONSTEXPR _cvtsh_ss(unsigned short __a) { return (float)__builtin_bit_cast(__fp16, __a); @@ -104,7 +112,7 @@ _cvtsh_ss(unsigned short __a) /// A 128-bit vector containing 16-bit half-precision float values. The lower /// 64 bits are used in the conversion. /// \returns A 128-bit vector of [4 x float] containing converted float values. -static __inline __m128 __DEFAULT_FN_ATTRS128 +static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtph_ps(__m128i __a) { typedef __fp16 __v4fp16 __attribute__((__vector_size__(8))); @@ -151,7 +159,7 @@ _mm_cvtph_ps(__m128i __a) /// converted to 32-bit single-precision float values. /// \returns A vector of [8 x float] containing the converted 32-bit /// single-precision float values. -static __inline __m256 __DEFAULT_FN_ATTRS256 +static __inline __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtph_ps(__m128i __a) { typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16))); @@ -161,5 +169,7 @@ _mm256_cvtph_ps(__m128i __a) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif /* __F16CINTRIN_H */ diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c old mode 100644 new mode 100755 index 6a696273cb3c8..c08ef76d56981 --- a/clang/test/CodeGen/X86/f16c-builtins.c +++ b/clang/test/CodeGen/X86/f16c-builtins.c @@ -10,6 +10,7 @@ #include +#include "builtin_test_helpers.h" float test_cvtsh_ss(unsigned short a) { // CHECK-LABEL: test_cvtsh_ss @@ -18,6 +19,10 @@ float test_cvtsh_ss(unsigned short a) { return _cvtsh_ss(a); } +TEST_CONSTEXPR(_cvtsh_ss(0x0000) == 0.0f); +TEST_CONSTEXPR(_cvtsh_ss(0x4500) == 5.0f); +TEST_CONSTEXPR(_cvtsh_ss(0xC000) == -2.0f); + unsigned short test_cvtss_sh(float a) { // CHECK-LABEL: test_cvtss_sh // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0 @@ -29,6 +34,11 @@ unsigned short test_cvtss_sh(float a) { return _cvtss_sh(a, 0); } +TEST_CONSTEXPR(match_m128( + _mm_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0, 0, 0, 0)), + 1.0f, 2.0f, 3.0f, 4.0f +)); + __m128 test_mm_cvtph_ps(__m128i a) { // CHECK-LABEL: test_mm_cvtph_ps // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> @@ -41,6 +51,10 @@ __m256 test_mm256_cvtph_ps(__m128i a) { // CHECK: fpext <8 x half> %{{.*}} to <8 x float> return _mm256_cvtph_ps(a); } +TEST_CONSTEXPR(match_m256( + _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)), + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f +)); __m128i test_mm_cvtps_ph(__m128 a) { // CHECK-LABEL: test_mm_cvtps_ph From da82d72a3dd986b90b179fda142114b540243213 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 12 Sep 2025 15:28:02 +0100 Subject: [PATCH 130/734] [mlir][Transforms] Fix crash in `reconcile-unrealized-casts` (#158298) The `reconcile-unrealized-casts` pass used to crash when the input contains circular chains of `unrealized_conversion_cast` ops. Furthermore, the `reconcileUnrealizedCasts` helper functions used to erase ops that were not passed via the `castOps` operand. Such ops are now preserved. That's why some integration tests had to be changed. Also avoid copying the set of all unresolved materializations in `convertOperations`. This commit is in preparation of turning `RewriterBase::replaceOp` into a non-virtual function. This is a re-upload of #158067, which was reverted due to CI failures. Note for LLVM integration: If you are seeing tests that are failing with `error: LLVM Translation failed for operation: builtin.unrealized_conversion_cast`, you may have to add the `-reconcile-unrealized-casts` pass to your pass pipeline. (Or switch to the `-convert-to-llvm` pass instead of combining the various `-convert-*-to-llvm` passes.) --------- Co-authored-by: Mehdi Amini --- .../mlir/Transforms/DialectConversion.h | 3 + .../Transforms/Utils/DialectConversion.cpp | 151 +++++++++++++----- .../reconcile-unrealized-casts.mlir | 50 ++++++ ...assume-alignment-runtime-verification.mlir | 3 +- .../atomic-rmw-runtime-verification.mlir | 3 +- .../MemRef/store-runtime-verification.mlir | 3 +- .../lib/Pass/TestVulkanRunnerPipeline.cpp | 2 + 7 files changed, 173 insertions(+), 42 deletions(-) diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index a096f82a4cfd8..f8caae3ce9995 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -1428,6 +1428,9 @@ struct ConversionConfig { /// /// In the above example, %0 can be used instead of %3 and all cast ops are /// folded away. +void reconcileUnrealizedCasts( + const DenseSet &castOps, + SmallVectorImpl *remainingCastOps = nullptr); void reconcileUnrealizedCasts( ArrayRef castOps, SmallVectorImpl *remainingCastOps = nullptr); diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index df9700f11200f..d53e1e78f2027 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -3100,6 +3100,7 @@ unsigned OperationLegalizer::applyCostModelToPatterns( //===----------------------------------------------------------------------===// // OperationConverter //===----------------------------------------------------------------------===// + namespace { enum OpConversionMode { /// In this mode, the conversion will ignore failed conversions to allow @@ -3117,6 +3118,13 @@ enum OpConversionMode { } // namespace namespace mlir { + +// Predeclaration only. +static void reconcileUnrealizedCasts( + const DenseMap + &castOps, + SmallVectorImpl *remainingCastOps); + // This class converts operations to a given conversion target via a set of // rewrite patterns. The conversion behaves differently depending on the // conversion mode. @@ -3264,18 +3272,13 @@ LogicalResult OperationConverter::convertOperations(ArrayRef ops) { // After a successful conversion, apply rewrites. rewriterImpl.applyRewrites(); - // Gather all unresolved materializations. - SmallVector allCastOps; - const DenseMap - &materializations = rewriterImpl.unresolvedMaterializations; - for (auto it : materializations) - allCastOps.push_back(it.first); - // Reconcile all UnrealizedConversionCastOps that were inserted by the - // dialect conversion frameworks. (Not the one that were inserted by + // dialect conversion frameworks. (Not the ones that were inserted by // patterns.) + const DenseMap + &materializations = rewriterImpl.unresolvedMaterializations; SmallVector remainingCastOps; - reconcileUnrealizedCasts(allCastOps, &remainingCastOps); + reconcileUnrealizedCasts(materializations, &remainingCastOps); // Drop markers. for (UnrealizedConversionCastOp castOp : remainingCastOps) @@ -3303,20 +3306,19 @@ LogicalResult OperationConverter::convertOperations(ArrayRef ops) { // Reconcile Unrealized Casts //===----------------------------------------------------------------------===// -void mlir::reconcileUnrealizedCasts( - ArrayRef castOps, +/// Try to reconcile all given UnrealizedConversionCastOps and store the +/// left-over ops in `remainingCastOps` (if provided). See documentation in +/// DialectConversion.h for more details. +/// The `isCastOpOfInterestFn` is used to filter the cast ops to proceed: the +/// algorithm may visit an operand (or user) which is a cast op, but will not +/// try to reconcile it if not in the filtered set. +template +static void reconcileUnrealizedCastsImpl( + RangeT castOps, + function_ref isCastOpOfInterestFn, SmallVectorImpl *remainingCastOps) { + // A worklist of cast ops to process. SetVector worklist(llvm::from_range, castOps); - // This set is maintained only if `remainingCastOps` is provided. - DenseSet erasedOps; - - // Helper function that adds all operands to the worklist that are an - // unrealized_conversion_cast op result. - auto enqueueOperands = [&](UnrealizedConversionCastOp castOp) { - for (Value v : castOp.getInputs()) - if (auto inputCastOp = v.getDefiningOp()) - worklist.insert(inputCastOp); - }; // Helper function that return the unrealized_conversion_cast op that // defines all inputs of the given op (in the same order). Return "nullptr" @@ -3337,39 +3339,110 @@ void mlir::reconcileUnrealizedCasts( // Process ops in the worklist bottom-to-top. while (!worklist.empty()) { UnrealizedConversionCastOp castOp = worklist.pop_back_val(); - if (castOp->use_empty()) { - // DCE: If the op has no users, erase it. Add the operands to the - // worklist to find additional DCE opportunities. - enqueueOperands(castOp); - if (remainingCastOps) - erasedOps.insert(castOp.getOperation()); - castOp->erase(); - continue; - } // Traverse the chain of input cast ops to see if an op with the same // input types can be found. UnrealizedConversionCastOp nextCast = castOp; while (nextCast) { if (nextCast.getInputs().getTypes() == castOp.getResultTypes()) { + if (llvm::any_of(nextCast.getInputs(), [&](Value v) { + return v.getDefiningOp() == castOp; + })) { + // Ran into a cycle. + break; + } + // Found a cast where the input types match the output types of the - // matched op. We can directly use those inputs and the matched op can - // be removed. - enqueueOperands(castOp); + // matched op. We can directly use those inputs. castOp.replaceAllUsesWith(nextCast.getInputs()); - if (remainingCastOps) - erasedOps.insert(castOp.getOperation()); - castOp->erase(); break; } nextCast = getInputCast(nextCast); } } - if (remainingCastOps) - for (UnrealizedConversionCastOp op : castOps) - if (!erasedOps.contains(op.getOperation())) + // A set of all alive cast ops. I.e., ops whose results are (transitively) + // used by an op that is not a cast op. + DenseSet liveOps; + + // Helper function that marks the given op and transitively reachable input + // cast ops as alive. + auto markOpLive = [&](Operation *rootOp) { + SmallVector worklist; + worklist.push_back(rootOp); + while (!worklist.empty()) { + Operation *op = worklist.pop_back_val(); + if (liveOps.insert(op).second) { + // Successfully inserted: process reachable input cast ops. + for (Value v : op->getOperands()) + if (auto castOp = v.getDefiningOp()) + if (isCastOpOfInterestFn(castOp)) + worklist.push_back(castOp); + } + } + }; + + // Find all alive cast ops. + for (UnrealizedConversionCastOp op : castOps) { + // The op may have been marked live already as being an operand of another + // live cast op. + if (liveOps.contains(op.getOperation())) + continue; + // If any of the users is not a cast op, mark the current op (and its + // input ops) as live. + if (llvm::any_of(op->getUsers(), [&](Operation *user) { + auto castOp = dyn_cast(user); + return !castOp || !isCastOpOfInterestFn(castOp); + })) + markOpLive(op); + } + + // Erase all dead cast ops. + for (UnrealizedConversionCastOp op : castOps) { + if (liveOps.contains(op)) { + // Op is alive and was not erased. Add it to the remaining cast ops. + if (remainingCastOps) remainingCastOps->push_back(op); + continue; + } + + // Op is dead. Erase it. + op->dropAllUses(); + op->erase(); + } +} + +void mlir::reconcileUnrealizedCasts( + ArrayRef castOps, + SmallVectorImpl *remainingCastOps) { + // Set of all cast ops for faster lookups. + DenseSet castOpSet; + for (UnrealizedConversionCastOp op : castOps) + castOpSet.insert(op); + reconcileUnrealizedCasts(castOpSet, remainingCastOps); +} + +void mlir::reconcileUnrealizedCasts( + const DenseSet &castOps, + SmallVectorImpl *remainingCastOps) { + reconcileUnrealizedCastsImpl( + llvm::make_range(castOps.begin(), castOps.end()), + [&](UnrealizedConversionCastOp castOp) { + return castOps.contains(castOp); + }, + remainingCastOps); +} + +static void mlir::reconcileUnrealizedCasts( + const DenseMap + &castOps, + SmallVectorImpl *remainingCastOps) { + reconcileUnrealizedCastsImpl( + castOps.keys(), + [&](UnrealizedConversionCastOp castOp) { + return castOps.contains(castOp); + }, + remainingCastOps); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir b/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir index 3573114f5e038..ac5ca321c066f 100644 --- a/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir +++ b/mlir/test/Conversion/ReconcileUnrealizedCasts/reconcile-unrealized-casts.mlir @@ -194,3 +194,53 @@ func.func @emptyCast() -> index { %0 = builtin.unrealized_conversion_cast to index return %0 : index } + +// ----- + +// CHECK-LABEL: test.graph_region +// CHECK-NEXT: "test.return"() : () -> () +test.graph_region { + %0 = builtin.unrealized_conversion_cast %2 : i32 to i64 + %1 = builtin.unrealized_conversion_cast %0 : i64 to i16 + %2 = builtin.unrealized_conversion_cast %1 : i16 to i32 + "test.return"() : () -> () +} + +// ----- + +// CHECK-LABEL: test.graph_region +// CHECK-NEXT: %[[cast0:.*]] = builtin.unrealized_conversion_cast %[[cast2:.*]] : i32 to i64 +// CHECK-NEXT: %[[cast1:.*]] = builtin.unrealized_conversion_cast %[[cast0]] : i64 to i16 +// CHECK-NEXT: %[[cast2]] = builtin.unrealized_conversion_cast %[[cast1]] : i16 to i32 +// CHECK-NEXT: "test.user"(%[[cast2]]) : (i32) -> () +// CHECK-NEXT: "test.return"() : () -> () +test.graph_region { + %0 = builtin.unrealized_conversion_cast %2 : i32 to i64 + %1 = builtin.unrealized_conversion_cast %0 : i64 to i16 + %2 = builtin.unrealized_conversion_cast %1 : i16 to i32 + "test.user"(%2) : (i32) -> () + "test.return"() : () -> () +} + +// ----- + +// CHECK-LABEL: test.graph_region +// CHECK-NEXT: "test.return"() : () -> () +test.graph_region { + %0 = builtin.unrealized_conversion_cast %0 : i32 to i32 + "test.return"() : () -> () +} + +// ----- + +// CHECK-LABEL: test.graph_region +// CHECK-NEXT: %[[c0:.*]] = arith.constant +// CHECK-NEXT: %[[cast:.*]]:2 = builtin.unrealized_conversion_cast %[[c0]], %[[cast]]#1 : i32, i32 to i32, i32 +// CHECK-NEXT: "test.user"(%[[cast]]#0) : (i32) -> () +// CHECK-NEXT: "test.return"() : () -> () +test.graph_region { + %cst = arith.constant 0 : i32 + %0, %1 = builtin.unrealized_conversion_cast %cst, %1 : i32, i32 to i32, i32 + "test.user"(%0) : (i32) -> () + "test.return"() : () -> () +} diff --git a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir index 25a338df8d790..01a826a638606 100644 --- a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir @@ -1,7 +1,8 @@ // RUN: mlir-opt %s -generate-runtime-verification \ // RUN: -expand-strided-metadata \ // RUN: -test-cf-assert \ -// RUN: -convert-to-llvm | \ +// RUN: -convert-to-llvm \ +// RUN: -reconcile-unrealized-casts | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir index 4c6a48d577a6c..1144a7caf36e8 100644 --- a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir @@ -1,6 +1,7 @@ // RUN: mlir-opt %s -generate-runtime-verification \ // RUN: -test-cf-assert \ -// RUN: -convert-to-llvm | \ +// RUN: -convert-to-llvm \ +// RUN: -reconcile-unrealized-casts | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir index dd000c6904bcb..82e63805cd027 100644 --- a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir @@ -1,6 +1,7 @@ // RUN: mlir-opt %s -generate-runtime-verification \ // RUN: -test-cf-assert \ -// RUN: -convert-to-llvm | \ +// RUN: -convert-to-llvm \ +// RUN: -reconcile-unrealized-casts | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ // RUN: FileCheck %s diff --git a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp index f5a6fc5ea2b20..e30c31693fae7 100644 --- a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp +++ b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp @@ -13,6 +13,7 @@ #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" +#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/GPU/Transforms/Passes.h" @@ -73,6 +74,7 @@ void buildTestVulkanRunnerPipeline(OpPassManager &passManager, opt.kernelBarePtrCallConv = true; opt.kernelIntersperseSizeCallConv = true; passManager.addPass(createGpuToLLVMConversionPass(opt)); + passManager.addPass(createReconcileUnrealizedCastsPass()); } } // namespace From 50f539c858aa4d7e71d9b5d5d7da7c30ffaf4bea Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Fri, 12 Sep 2025 16:30:27 +0200 Subject: [PATCH 131/734] [MLIR] Add remark flags to mlir-opt (#156825) --- mlir/include/mlir/IR/Remarks.h | 2 +- .../include/mlir/Tools/mlir-opt/MlirOptMain.h | 44 +++++++ mlir/lib/IR/Remarks.cpp | 47 +++++++- mlir/lib/Tools/mlir-opt/CMakeLists.txt | 1 + mlir/lib/Tools/mlir-opt/MlirOptMain.cpp | 108 ++++++++++++++++-- mlir/test/Pass/remarks.mlir | 28 +++++ mlir/test/lib/Pass/CMakeLists.txt | 1 + mlir/test/lib/Pass/TestRemarksPass.cpp | 74 ++++++++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + mlir/unittests/IR/CMakeLists.txt | 2 +- mlir/unittests/IR/RemarkTest.cpp | 9 +- 11 files changed, 300 insertions(+), 18 deletions(-) create mode 100644 mlir/test/Pass/remarks.mlir create mode 100644 mlir/test/lib/Pass/TestRemarksPass.cpp diff --git a/mlir/include/mlir/IR/Remarks.h b/mlir/include/mlir/IR/Remarks.h index 26d65472f2b1c..20e84ec83cd01 100644 --- a/mlir/include/mlir/IR/Remarks.h +++ b/mlir/include/mlir/IR/Remarks.h @@ -29,7 +29,7 @@ namespace mlir::remark { /// Define an the set of categories to accept. By default none are, the provided /// regex matches against the category names for each kind of remark. struct RemarkCategories { - std::optional passed, missed, analysis, failed; + std::optional all, passed, missed, analysis, failed; }; /// Categories describe the outcome of an transformation, not the mechanics of diff --git a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h index 94231227599c9..c3ac9d99c24bf 100644 --- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h +++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h @@ -38,6 +38,12 @@ enum class VerbosityLevel { ErrorsWarningsAndRemarks }; +using RemarkFormat = enum { + REMARK_FORMAT_STDOUT, + REMARK_FORMAT_YAML, + REMARK_FORMAT_BITSTREAM, +}; + /// Configuration options for the mlir-opt tool. /// This is intended to help building tools like mlir-opt by collecting the /// supported options. @@ -221,15 +227,53 @@ class MlirOptMainConfig { } bool shouldVerifyRoundtrip() const { return verifyRoundtripFlag; } + /// Checks if any remark filters are set. + bool shouldEmitRemarks() const { + // Emit all remarks only when no filters are specified. + const bool hasFilters = + !getRemarksAllFilter().empty() || !getRemarksPassedFilter().empty() || + !getRemarksFailedFilter().empty() || + !getRemarksMissedFilter().empty() || !getRemarksAnalyseFilter().empty(); + return hasFilters; + } + /// Reproducer file generation (no crash required). StringRef getReproducerFilename() const { return generateReproducerFileFlag; } + /// Set the reproducer output filename + RemarkFormat getRemarkFormat() const { return remarkFormatFlag; } + /// Set the remark format to use. + std::string getRemarksAllFilter() const { return remarksAllFilterFlag; } + /// Set the remark output file. + std::string getRemarksOutputFile() const { return remarksOutputFileFlag; } + /// Set the remark passed filters. + std::string getRemarksPassedFilter() const { return remarksPassedFilterFlag; } + /// Set the remark failed filters. + std::string getRemarksFailedFilter() const { return remarksFailedFilterFlag; } + /// Set the remark missed filters. + std::string getRemarksMissedFilter() const { return remarksMissedFilterFlag; } + /// Set the remark analyse filters. + std::string getRemarksAnalyseFilter() const { + return remarksAnalyseFilterFlag; + } + protected: /// Allow operation with no registered dialects. /// This option is for convenience during testing only and discouraged in /// general. bool allowUnregisteredDialectsFlag = false; + /// Remark format + RemarkFormat remarkFormatFlag; + /// Remark file to output to + std::string remarksOutputFileFlag = ""; + /// Remark filters + std::string remarksAllFilterFlag = ""; + std::string remarksPassedFilterFlag = ""; + std::string remarksFailedFilterFlag = ""; + std::string remarksMissedFilterFlag = ""; + std::string remarksAnalyseFilterFlag = ""; + /// Configuration for the debugging hooks. tracing::DebugConfig debugConfig; diff --git a/mlir/lib/IR/Remarks.cpp b/mlir/lib/IR/Remarks.cpp index 78c964427868f..29088bd360e23 100644 --- a/mlir/lib/IR/Remarks.cpp +++ b/mlir/lib/IR/Remarks.cpp @@ -248,17 +248,56 @@ RemarkEngine::initialize(std::unique_ptr streamer, return success(); } +/// Returns true if filter is already anchored like ^...$ +static bool isAnchored(llvm::StringRef s) { + s = s.trim(); + return s.starts_with("^") && s.ends_with("$"); // note: startswith/endswith +} + +/// Anchor the entire pattern so it matches the whole string. +static std::string anchorWhole(llvm::StringRef filter) { + if (isAnchored(filter)) + return filter.str(); + return (llvm::Twine("^(") + filter + ")$").str(); +} + +/// Build a combined filter from cats.all and a category-specific pattern. +/// If neither is present, return std::nullopt. Otherwise "(all|specific)" +/// and anchor once. Also validate before returning. +static std::optional +buildFilter(const mlir::remark::RemarkCategories &cats, + const std::optional &specific) { + llvm::SmallVector parts; + if (cats.all && !cats.all->empty()) + parts.emplace_back(*cats.all); + if (specific && !specific->empty()) + parts.emplace_back(*specific); + + if (parts.empty()) + return std::nullopt; + + std::string joined = llvm::join(parts, "|"); + std::string anchored = anchorWhole(joined); + + llvm::Regex rx(anchored); + std::string err; + if (!rx.isValid(err)) + return std::nullopt; + + return rx; +} + RemarkEngine::RemarkEngine(bool printAsEmitRemarks, const RemarkCategories &cats) : printAsEmitRemarks(printAsEmitRemarks) { if (cats.passed) - passedFilter = llvm::Regex(cats.passed.value()); + passedFilter = buildFilter(cats, cats.passed); if (cats.missed) - missFilter = llvm::Regex(cats.missed.value()); + missFilter = buildFilter(cats, cats.missed); if (cats.analysis) - analysisFilter = llvm::Regex(cats.analysis.value()); + analysisFilter = buildFilter(cats, cats.analysis); if (cats.failed) - failedFilter = llvm::Regex(cats.failed.value()); + failedFilter = buildFilter(cats, cats.failed); } llvm::LogicalResult mlir::remark::enableOptimizationRemarks( diff --git a/mlir/lib/Tools/mlir-opt/CMakeLists.txt b/mlir/lib/Tools/mlir-opt/CMakeLists.txt index f24d4c60174ee..858c9c1f97f9c 100644 --- a/mlir/lib/Tools/mlir-opt/CMakeLists.txt +++ b/mlir/lib/Tools/mlir-opt/CMakeLists.txt @@ -13,4 +13,5 @@ add_mlir_library(MLIROptLib MLIRPluginsLib MLIRSupport MLIRIRDL + MLIRRemarkStreamer ) diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp index de714d8b740af..4f3b2eda7e69b 100644 --- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp +++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp @@ -23,9 +23,11 @@ #include "mlir/IR/Diagnostics.h" #include "mlir/IR/Location.h" #include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Remarks.h" #include "mlir/Parser/Parser.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassRegistry.h" +#include "mlir/Remark/RemarkStreamer.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Support/Timing.h" #include "mlir/Support/ToolUtilities.h" @@ -33,6 +35,7 @@ #include "mlir/Tools/Plugins/DialectPlugin.h" #include "mlir/Tools/Plugins/PassPlugin.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Remarks/RemarkFormat.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/LogicalResult.h" @@ -204,6 +207,58 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig { cl::location(generateReproducerFileFlag), cl::init(""), cl::value_desc("filename")); + static cl::OptionCategory remarkCategory( + "Remark Options", + "Filter remarks by regular expression (llvm::Regex syntax)."); + + static llvm::cl::opt remarkFormat{ + "remark-format", + llvm::cl::desc("Specify the format for remark output."), + cl::location(remarkFormatFlag), + llvm::cl::value_desc("format"), + llvm::cl::init(REMARK_FORMAT_STDOUT), + llvm::cl::values( + clEnumValN(REMARK_FORMAT_STDOUT, "emitRemark", + "Print as emitRemark to command-line"), + clEnumValN(REMARK_FORMAT_YAML, "yaml", "Print yaml file"), + clEnumValN(REMARK_FORMAT_BITSTREAM, "bitstream", + "Print bitstream file")), + llvm::cl::cat(remarkCategory)}; + + static cl::opt remarksAll( + "remarks-filter", + cl::desc("Show all remarks: passed, missed, failed, analysis"), + cl::location(remarksAllFilterFlag), cl::init(""), + cl::cat(remarkCategory)); + + static cl::opt remarksFile( + "remarks-output-file", + cl::desc( + "Output file for yaml and bitstream remark formats. Default is " + "mlir-remarks.yaml or mlir-remarks.bitstream"), + cl::location(remarksOutputFileFlag), cl::init(""), + cl::cat(remarkCategory)); + + static cl::opt remarksPassed( + "remarks-filter-passed", cl::desc("Show passed remarks"), + cl::location(remarksPassedFilterFlag), cl::init(""), + cl::cat(remarkCategory)); + + static cl::opt remarksFailed( + "remarks-filter-failed", cl::desc("Show failed remarks"), + cl::location(remarksFailedFilterFlag), cl::init(""), + cl::cat(remarkCategory)); + + static cl::opt remarksMissed( + "remarks-filter-missed", cl::desc("Show missed remarks"), + cl::location(remarksMissedFilterFlag), cl::init(""), + cl::cat(remarkCategory)); + + static cl::opt remarksAnalyse( + "remarks-filter-analyse", cl::desc("Show analysis remarks"), + cl::location(remarksAnalyseFilterFlag), cl::init(""), + cl::cat(remarkCategory)); + /// Set the callback to load a pass plugin. passPlugins.setCallback([&](const std::string &pluginPath) { auto plugin = PassPlugin::load(pluginPath); @@ -241,23 +296,23 @@ class DiagnosticFilter : public ScopedDiagnosticHandler { setHandler([verbosityLevel, showNotes](Diagnostic &diag) { auto severity = diag.getSeverity(); switch (severity) { - case DiagnosticSeverity::Error: + case mlir::DiagnosticSeverity::Error: // failure indicates that the error is not handled by the filter and // goes through to the default handler. Therefore, the error can be // successfully printed. return failure(); - case DiagnosticSeverity::Warning: + case mlir::DiagnosticSeverity::Warning: if (verbosityLevel == VerbosityLevel::ErrorsOnly) return success(); else return failure(); - case DiagnosticSeverity::Remark: + case mlir::DiagnosticSeverity::Remark: if (verbosityLevel == VerbosityLevel::ErrorsOnly || verbosityLevel == VerbosityLevel::ErrorsAndWarnings) return success(); else return failure(); - case DiagnosticSeverity::Note: + case mlir::DiagnosticSeverity::Note: if (showNotes) return failure(); else @@ -462,6 +517,41 @@ performActions(raw_ostream &os, context->enableMultithreading(wasThreadingEnabled); + remark::RemarkCategories cats{ + config.getRemarksAllFilter(), config.getRemarksPassedFilter(), + config.getRemarksMissedFilter(), config.getRemarksAnalyseFilter(), + config.getRemarksFailedFilter()}; + + mlir::MLIRContext &ctx = *context; + + switch (config.getRemarkFormat()) { + case REMARK_FORMAT_STDOUT: + if (failed(mlir::remark::enableOptimizationRemarks( + ctx, nullptr, cats, true /*printAsEmitRemarks*/))) + return failure(); + break; + + case REMARK_FORMAT_YAML: { + std::string file = config.getRemarksOutputFile().empty() + ? "mlir-remarks.yaml" + : config.getRemarksOutputFile(); + if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer( + ctx, file, llvm::remarks::Format::YAML, cats))) + return failure(); + break; + } + + case REMARK_FORMAT_BITSTREAM: { + std::string file = config.getRemarksOutputFile().empty() + ? "mlir-remarks.bitstream" + : config.getRemarksOutputFile(); + if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer( + ctx, file, llvm::remarks::Format::Bitstream, cats))) + return failure(); + break; + } + } + // Prepare the pass manager, applying command-line and reproducer options. PassManager pm(op.get()->getName(), PassManager::Nesting::Implicit); pm.enableVerifier(config.shouldVerifyPasses()); @@ -523,8 +613,8 @@ processBuffer(raw_ostream &os, std::unique_ptr ownedBuffer, SMLoc()); sourceMgr->AddNewSourceBuffer(std::move(ownedBuffer), SMLoc()); - // Create a context just for the current buffer. Disable threading on creation - // since we'll inject the thread-pool separately. + // Create a context just for the current buffer. Disable threading on + // creation since we'll inject the thread-pool separately. MLIRContext context(registry, MLIRContext::Threading::DISABLED); if (threadPool) context.setThreadPool(*threadPool); @@ -669,9 +759,9 @@ LogicalResult mlir::MlirOptMain(int argc, char **argv, if (config.shouldListPasses()) return printRegisteredPassesAndReturn(); - // When reading from stdin and the input is a tty, it is often a user mistake - // and the process "appears to be stuck". Print a message to let the user know - // about it! + // When reading from stdin and the input is a tty, it is often a user + // mistake and the process "appears to be stuck". Print a message to let the + // user know about it! if (inputFilename == "-" && sys::Process::FileDescriptorIsDisplayed(fileno(stdin))) llvm::errs() << "(processing input from stdin now, hit ctrl-c/ctrl-d to " diff --git a/mlir/test/Pass/remarks.mlir b/mlir/test/Pass/remarks.mlir new file mode 100644 index 0000000000000..8aa04e3c98d80 --- /dev/null +++ b/mlir/test/Pass/remarks.mlir @@ -0,0 +1,28 @@ +// RUN: mlir-opt %s --test-remark --remarks-filter-passed="category-1-passed" 2>&1 | FileCheck %s -check-prefix=CHECK-PASSED +// RUN: mlir-opt %s --test-remark --remarks-filter-missed="a-category-1-missed" 2>&1 | FileCheck %s -check-prefix=CHECK-MISSED +// RUN: mlir-opt %s --test-remark --remarks-filter-failed="category-2-failed" 2>&1 | FileCheck %s -check-prefix=CHECK-FAILED +// RUN: mlir-opt %s --test-remark --remarks-filter-analyse="category-2-analysis" 2>&1 | FileCheck %s -check-prefix=CHECK-ANALYSIS +// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" 2>&1 | FileCheck %s -check-prefix=CHECK-ALL +// RUN: mlir-opt %s --test-remark --remarks-filter="category-1.*" 2>&1 | FileCheck %s -check-prefix=CHECK-ALL1 +module @foo { + "test.op"() : () -> () + +} + + +// CHECK-PASSED: remarks.mlir:8:3: remark: [Passed] test-remark | Category:category-1-passed | Reason="because we are testing the remark pipeline", Remark="This is a test passed remark", Suggestion="try using the remark pipeline feature" +// CHECK-MISSED:remarks.mlir:8:3: remark: [Missed] test-remark | Category:a-category-1-missed | Reason="because we are testing the remark pipeline", Remark="This is a test missed remark", Suggestion="try using the remark pipeline feature" +// CHECK-FAILED: remarks.mlir:8:3: remark: [Failure] test-remark | Category:category-2-failed | Reason="because we are testing the remark pipeline", Remark="This is a test failed remark", Suggestion="try using the remark pipeline feature" +// CHECK-ANALYSIS: remarks.mlir:8:3: remark: [Analysis] test-remark | Category:category-2-analysis | Remark="This is a test analysis remark" + + +// CHECK-ALL: remarks.mlir:8:3: remark: [Passed] test-remark | Category:category-1-passed | Reason="because we are testing the remark pipeline", Remark="This is a test passed remark", Suggestion="try using the remark pipeline feature" +// CHECK-ALL: remarks.mlir:8:3: remark: [Failure] test-remark | Category:category-2-failed | Reason="because we are testing the remark pipeline", Remark="This is a test failed remark", Suggestion="try using the remark pipeline feature" +// CHECK-ALL: remarks.mlir:8:3: remark: [Analysis] test-remark | Category:category-2-analysis | Remark="This is a test analysis remark" + +// CHECK-ALL1: remarks.mlir:8:3: remark: [Passed] test-remark | Category:category-1-passed | Reason="because we are testing the remark pipeline", Remark="This is a test passed remark", Suggestion="try using the remark pipeline feature" +// CHECK-ALL1-NOT: remarks.mlir:8:3: remark: [Missed] +// CHECK-ALL1-NOT: remarks.mlir:8:3: remark: [Failure] +// CHECK-ALL1-NOT: remarks.mlir:8:3: remark: [Analysis] + + diff --git a/mlir/test/lib/Pass/CMakeLists.txt b/mlir/test/lib/Pass/CMakeLists.txt index ab52f621c517e..04c91635def85 100644 --- a/mlir/test/lib/Pass/CMakeLists.txt +++ b/mlir/test/lib/Pass/CMakeLists.txt @@ -4,6 +4,7 @@ add_mlir_library(MLIRTestPass TestConvertToSPIRVPass.cpp TestDynamicPipeline.cpp TestPassManager.cpp + TestRemarksPass.cpp TestSPIRVCPURunnerPipeline.cpp TestVulkanRunnerPipeline.cpp diff --git a/mlir/test/lib/Pass/TestRemarksPass.cpp b/mlir/test/lib/Pass/TestRemarksPass.cpp new file mode 100644 index 0000000000000..3b25686b3dc14 --- /dev/null +++ b/mlir/test/lib/Pass/TestRemarksPass.cpp @@ -0,0 +1,74 @@ +//===------ TestRemarkPipeline.cpp --- dynamic pipeline test pass --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to test the dynamic pipeline feature. +// +//===----------------------------------------------------------------------===// + +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Remarks.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Support/WalkResult.h" + +using namespace mlir; + +namespace { + +class TestRemarkPass : public PassWrapper> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestRemarkPass) + + StringRef getArgument() const final { return "test-remark"; } + StringRef getDescription() const final { + return "Tests the remark pipeline feature"; + } + + TestRemarkPass() = default; + + void runOnOperation() override { + + getOperation()->walk([](Operation *op) { + if (isa(op)) + return WalkResult::advance(); + Location loc = op->getLoc(); + mlir::remark::missed(loc, remark::RemarkOpts::name("test-remark") + .category("a-category-1-missed")) + << remark::add("This is a test missed remark") + << remark::reason("because we are testing the remark pipeline") + << remark::suggest("try using the remark pipeline feature"); + + mlir::remark::passed( + loc, + remark::RemarkOpts::name("test-remark").category("category-1-passed")) + << remark::add("This is a test passed remark") + << remark::reason("because we are testing the remark pipeline") + << remark::suggest("try using the remark pipeline feature"); + + mlir::remark::failed( + loc, + remark::RemarkOpts::name("test-remark").category("category-2-failed")) + << remark::add("This is a test failed remark") + << remark::reason("because we are testing the remark pipeline") + << remark::suggest("try using the remark pipeline feature"); + + mlir::remark::analysis(loc, remark::RemarkOpts::name("test-remark") + .category("category-2-analysis")) + << remark::add("This is a test analysis remark"); + return WalkResult::advance(); + }); + } +}; +} // namespace + +namespace mlir { +namespace test { +void registerTestRemarkPass() { PassRegistration(); } +} // namespace test +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 7b992b4ee029b..e4620c009af8c 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -97,6 +97,7 @@ void registerTestDiagnosticsPass(); void registerTestDiagnosticsMetadataPass(); void registerTestDominancePass(); void registerTestDynamicPipelinePass(); +void registerTestRemarkPass(); void registerTestEmulateNarrowTypePass(); void registerTestFooAnalysisPass(); void registerTestComposeSubView(); @@ -243,6 +244,7 @@ void registerTestPasses() { mlir::test::registerTestDiagnosticsMetadataPass(); mlir::test::registerTestDominancePass(); mlir::test::registerTestDynamicPipelinePass(); + mlir::test::registerTestRemarkPass(); mlir::test::registerTestEmulateNarrowTypePass(); mlir::test::registerTestFooAnalysisPass(); mlir::test::registerTestComposeSubView(); diff --git a/mlir/unittests/IR/CMakeLists.txt b/mlir/unittests/IR/CMakeLists.txt index 75cd2d65ef5a1..dd3b110dcd295 100644 --- a/mlir/unittests/IR/CMakeLists.txt +++ b/mlir/unittests/IR/CMakeLists.txt @@ -14,7 +14,7 @@ add_mlir_unittest(MLIRIRTests MemrefLayoutTest.cpp OperationSupportTest.cpp PatternMatchTest.cpp - RemarkTest.cpp + RemarkTest.cpp ShapedTypeTest.cpp SymbolTableTest.cpp TypeTest.cpp diff --git a/mlir/unittests/IR/RemarkTest.cpp b/mlir/unittests/IR/RemarkTest.cpp index 65e1e08b83838..5bfca255c22ca 100644 --- a/mlir/unittests/IR/RemarkTest.cpp +++ b/mlir/unittests/IR/RemarkTest.cpp @@ -48,7 +48,8 @@ TEST(Remark, TestOutputOptimizationRemark) { context.printStackTraceOnDiagnostic(true); // Setup the remark engine - mlir::remark::RemarkCategories cats{/*passed=*/categoryVectorizer, + mlir::remark::RemarkCategories cats{/*all=*/"", + /*passed=*/categoryVectorizer, /*missed=*/categoryUnroll, /*analysis=*/categoryRegister, /*failed=*/categoryInliner}; @@ -197,7 +198,8 @@ TEST(Remark, TestOutputOptimizationRemarkDiagnostic) { }); // Setup the remark engine - mlir::remark::RemarkCategories cats{/*passed=*/categoryVectorizer, + mlir::remark::RemarkCategories cats{/*all=*/"", + /*passed=*/categoryVectorizer, /*missed=*/categoryUnroll, /*analysis=*/categoryRegister, /*failed=*/categoryUnroll}; @@ -278,7 +280,8 @@ TEST(Remark, TestCustomOptimizationRemarkDiagnostic) { Location loc = UnknownLoc::get(&context); // Setup the remark engine - mlir::remark::RemarkCategories cats{/*passed=*/categoryLoopunroll, + mlir::remark::RemarkCategories cats{/*all=*/"", + /*passed=*/categoryLoopunroll, /*missed=*/std::nullopt, /*analysis=*/std::nullopt, /*failed=*/categoryLoopunroll}; From 179f01b800e29b38f7d97c043ff331d4f202a12a Mon Sep 17 00:00:00 2001 From: Brandon Kirincich <44515121+BrandonKi@users.noreply.github.com> Date: Fri, 12 Sep 2025 10:48:27 -0400 Subject: [PATCH 132/734] Fix MLIR Transform Tutorial Doc (#155285) Fixes a small issue I noticed while reading through the tutorial. --- mlir/docs/Tutorials/transform/Ch0.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/docs/Tutorials/transform/Ch0.md b/mlir/docs/Tutorials/transform/Ch0.md index dc4b753f98caa..0d7a70364742d 100644 --- a/mlir/docs/Tutorials/transform/Ch0.md +++ b/mlir/docs/Tutorials/transform/Ch0.md @@ -134,7 +134,7 @@ Furthermore, the operation now contains a region that explicitly specifies the m ## “Loop” Fusion -Since the region of the `linalg.generic` operation can contain arbitrarily many operations, we can use it to express “fusion” of the implicit loops by simply having more operations chained in the region. For example, the common machine learning rectified linear unit layer (ReLU), which can be defined as `relu(x) = max(0, x)`, can be defined be expressed using the “compare-and-select” idiom in one `linalg.generic` operation, without the temporary buffer for the comparison result and without repeating the outer operation: +Since the region of the `linalg.generic` operation can contain arbitrarily many operations, we can use it to express “fusion” of the implicit loops by simply having more operations chained in the region. For example, the common machine learning rectified linear unit layer (ReLU), which can be defined as `relu(x) = max(0, x)`, can be expressed using the “compare-and-select” idiom in one `linalg.generic` operation, without the temporary buffer for the comparison result and without repeating the outer operation: ```mlir linalg.generic { From 889c289a409eea443cc5eba54d68cc6a3161be07 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 12 Sep 2025 07:50:44 -0700 Subject: [PATCH 133/734] [SimplfyCFG] Set `MD_prof` for `select` used for certain conditional simplifications (#154426) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There’s a pattern where a branch is conditioned on a conjunction or disjunction that ends up being modeled as a `select`​ where the first operand is set to `true`​ or the second to `false`​. If the branch has known branch weights, they can be copied to the `select`​. This is worth doing in case later the `select`​ gets transformed to something else (i.e. if we know the profile, we should propagate it). Issue #147390 --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 32 +++++- .../SimplifyCFG/branch-fold-threshold.ll | 29 ++++-- .../Transforms/SimplifyCFG/branch-fold.ll | 19 +++- .../SimplifyCFG/preserve-branchweights.ll | 97 ++++++++++--------- 4 files changed, 115 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 850e57e6b0b14..e5517409ded70 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -332,6 +332,16 @@ class SimplifyCFGOpt { } }; +// we synthesize a || b as select a, true, b +// we synthesize a && b as select a, b, false +// this function determines if SI is playing one of those roles. +bool isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) { + return ((isa(SI->getTrueValue()) && + (dyn_cast(SI->getTrueValue())->isOne())) || + (isa(SI->getFalseValue()) && + (dyn_cast(SI->getFalseValue())->isNullValue()))); +} + } // end anonymous namespace /// Return true if all the PHI nodes in the basic block \p BB @@ -4033,6 +4043,7 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, // Try to update branch weights. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + SmallVector MDWeights; if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight)) { SmallVector NewWeights; @@ -4063,7 +4074,7 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, // Halve the weights if any of them cannot fit in an uint32_t fitWeights(NewWeights); - SmallVector MDWeights(NewWeights.begin(), NewWeights.end()); + append_range(MDWeights, NewWeights); setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false); // TODO: If BB is reachable from all paths through PredBlock, then we @@ -4100,6 +4111,13 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, Value *BICond = VMap[BI->getCondition()]; PBI->setCondition( createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond")); + if (!ProfcheckDisableMetadataFixes) + if (auto *SI = dyn_cast(PBI->getCondition())) + if (!MDWeights.empty()) { + assert(isSelectInRoleOfConjunctionOrDisjunction(SI)); + setBranchWeights(SI, MDWeights[0], MDWeights[1], + /*IsExpected=*/false); + } ++NumFoldBranchToCommonDest; return true; @@ -4812,6 +4830,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, fitWeights(NewWeights); setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false); + // Cond may be a select instruction with the first operand set to "true", or + // the second to "false" (see how createLogicalOp works for `and` and `or`) + if (!ProfcheckDisableMetadataFixes) + if (auto *SI = dyn_cast(Cond)) { + assert(isSelectInRoleOfConjunctionOrDisjunction(SI)); + // The select is predicated on PBICond + assert(dyn_cast(SI)->getCondition() == PBICond); + // The corresponding probabilities are what was referred to above as + // PredCommon and PredOther. + setBranchWeights(SI, PredCommon, PredOther, + /*IsExpected=*/false); + } } // OtherDest may have phi nodes. If so, add an entry from PBI's diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll index 4384847ce156b..71ad069fb8d06 100644 --- a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll +++ b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 ; RUN: opt %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefixes=NORMAL,BASELINE ; RUN: opt %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=2 | FileCheck %s --check-prefixes=NORMAL,AGGRESSIVE ; RUN: opt %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=4 | FileCheck %s --check-prefixes=WAYAGGRESSIVE @@ -11,12 +11,12 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, ptr %input) { ; BASELINE-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], ptr [[INPUT:%.*]]) { ; BASELINE-NEXT: [[ENTRY:.*]]: ; BASELINE-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D]], 3 -; BASELINE-NEXT: br i1 [[CMP]], label %[[COND_END:.*]], label %[[LOR_LHS_FALSE:.*]] +; BASELINE-NEXT: br i1 [[CMP]], label %[[COND_END:.*]], label %[[LOR_LHS_FALSE:.*]], !prof [[PROF0:![0-9]+]] ; BASELINE: [[LOR_LHS_FALSE]]: ; BASELINE-NEXT: [[MUL:%.*]] = shl i32 [[C]], 1 ; BASELINE-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], [[A]] ; BASELINE-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B]] -; BASELINE-NEXT: br i1 [[CMP1]], label %[[COND_FALSE:.*]], label %[[COND_END]] +; BASELINE-NEXT: br i1 [[CMP1]], label %[[COND_FALSE:.*]], label %[[COND_END]], !prof [[PROF1:![0-9]+]] ; BASELINE: [[COND_FALSE]]: ; BASELINE-NEXT: [[TMP0:%.*]] = load i32, ptr [[INPUT]], align 4 ; BASELINE-NEXT: br label %[[COND_END]] @@ -31,8 +31,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, ptr %input) { ; AGGRESSIVE-NEXT: [[MUL:%.*]] = shl i32 [[C]], 1 ; AGGRESSIVE-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], [[A]] ; AGGRESSIVE-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B]] -; AGGRESSIVE-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 false -; AGGRESSIVE-NEXT: br i1 [[OR_COND]], label %[[COND_FALSE:.*]], label %[[COND_END:.*]] +; AGGRESSIVE-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 false, !prof [[PROF0:![0-9]+]] +; AGGRESSIVE-NEXT: br i1 [[OR_COND]], label %[[COND_FALSE:.*]], label %[[COND_END:.*]], !prof [[PROF0]] ; AGGRESSIVE: [[COND_FALSE]]: ; AGGRESSIVE-NEXT: [[TMP0:%.*]] = load i32, ptr [[INPUT]], align 4 ; AGGRESSIVE-NEXT: br label %[[COND_END]] @@ -47,8 +47,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, ptr %input) { ; WAYAGGRESSIVE-NEXT: [[MUL:%.*]] = shl i32 [[C]], 1 ; WAYAGGRESSIVE-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], [[A]] ; WAYAGGRESSIVE-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B]] -; WAYAGGRESSIVE-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 false -; WAYAGGRESSIVE-NEXT: br i1 [[OR_COND]], label %[[COND_FALSE:.*]], label %[[COND_END:.*]] +; WAYAGGRESSIVE-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 false, !prof [[PROF0:![0-9]+]] +; WAYAGGRESSIVE-NEXT: br i1 [[OR_COND]], label %[[COND_FALSE:.*]], label %[[COND_END:.*]], !prof [[PROF0]] ; WAYAGGRESSIVE: [[COND_FALSE]]: ; WAYAGGRESSIVE-NEXT: [[TMP0:%.*]] = load i32, ptr [[INPUT]], align 4 ; WAYAGGRESSIVE-NEXT: br label %[[COND_END]] @@ -58,13 +58,13 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, ptr %input) { ; entry: %cmp = icmp sgt i32 %d, 3 - br i1 %cmp, label %cond.end, label %lor.lhs.false + br i1 %cmp, label %cond.end, label %lor.lhs.false, !prof !0 lor.lhs.false: %mul = shl i32 %c, 1 %add = add nsw i32 %mul, %a %cmp1 = icmp slt i32 %add, %b - br i1 %cmp1, label %cond.false, label %cond.end + br i1 %cmp1, label %cond.false, label %cond.end, !prof !1 cond.false: %0 = load i32, ptr %input, align 4 @@ -160,3 +160,14 @@ cond.end: %cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ],[ 0, %pred_a ],[ 0, %pred_b ] ret i32 %cond } + +!0 = !{!"branch_weights", i32 7, i32 11} +!1 = !{!"branch_weights", i32 13, i32 5} +;. +; BASELINE: [[PROF0]] = !{!"branch_weights", i32 7, i32 11} +; BASELINE: [[PROF1]] = !{!"branch_weights", i32 13, i32 5} +;. +; AGGRESSIVE: [[PROF0]] = !{!"branch_weights", i32 143, i32 181} +;. +; WAYAGGRESSIVE: [[PROF0]] = !{!"branch_weights", i32 143, i32 181} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll index 2f5fb4f33013d..8e7b91ea172be 100644 --- a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll +++ b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll @@ -1,12 +1,12 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s define void @test(ptr %P, ptr %Q, i1 %A, i1 %B) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_NOT:%.*]] = xor i1 [[A:%.*]], true -; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[A_NOT]], i1 true, i1 [[B:%.*]] -; CHECK-NEXT: br i1 [[BRMERGE]], label [[B:%.*]], label [[COMMON_RET:%.*]] +; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[A_NOT]], i1 true, i1 [[B:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[BRMERGE]], label [[B:%.*]], label [[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: b: @@ -15,9 +15,9 @@ define void @test(ptr %P, ptr %Q, i1 %A, i1 %B) { ; entry: - br i1 %A, label %a, label %b + br i1 %A, label %a, label %b, !prof !0 a: - br i1 %B, label %b, label %c + br i1 %B, label %b, label %c, !prof !1 b: store i32 123, ptr %P ret void @@ -146,3 +146,12 @@ Succ: } declare void @dummy() + +!0 = !{!"branch_weights", i32 3, i32 7} +!1 = !{!"branch_weights", i32 11, i32 4} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp memory(read) uwtable } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 7, i32 3} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 138, i32 12} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll index ba542459a396c..0624f72d7a142 100644 --- a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll +++ b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll @@ -11,8 +11,8 @@ define void @test1(i1 %a, i1 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_NOT:%.*]] = xor i1 [[A:%.*]], true ; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A_NOT]], i1 [[C]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A_NOT]], i1 [[C]], i1 false, !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF0]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: Y: @@ -42,8 +42,8 @@ define void @test2(i1 %a, i1 %b) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false, !prof [[PROF1:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF1]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: Y: @@ -73,8 +73,8 @@ define void @test3(i1 %a, i1 %b) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false, !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF2]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: Y: @@ -104,7 +104,7 @@ define void @test4(i1 %a, i1 %b) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false, !prof [[PROF2]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF2]] ; CHECK: common.ret: ; CHECK-NEXT: ret void @@ -237,8 +237,8 @@ define void @test1_swap(i1 %a, i1 %b) { ; CHECK-LABEL: @test1_swap( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[C]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[Y:%.*]], label [[Z:%.*]], !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[C]], !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[Y:%.*]], label [[Z:%.*]], !prof [[PROF5]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: Y: @@ -268,8 +268,8 @@ define void @test7(i1 %a, i1 %b) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false -; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[C]] -; CHECK-NEXT: br i1 [[BRMERGE]], label [[Y:%.*]], label [[Z:%.*]], !prof [[PROF6:![0-9]+]] +; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[C]], !prof [[PROF6:![0-9]+]] +; CHECK-NEXT: br i1 [[BRMERGE]], label [[Y:%.*]], label [[Z:%.*]], !prof [[PROF7:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: Y: @@ -300,7 +300,7 @@ define void @test8(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LT:%.*]] = icmp slt i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: br i1 [[LT]], label [[A:%.*]], label [[B:%.*]], !prof [[PROF7:![0-9]+]] +; CHECK-NEXT: br i1 [[LT]], label [[A:%.*]], label [[B:%.*]], !prof [[PROF8:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: a: @@ -339,7 +339,7 @@ define i1 @test9(i32 %x, i32 %y) nounwind { ; CHECK-NEXT: i32 1, label [[END:%.*]] ; CHECK-NEXT: i32 2, label [[END]] ; CHECK-NEXT: i32 92, label [[END]] -; CHECK-NEXT: ], !prof [[PROF8:![0-9]+]] +; CHECK-NEXT: ], !prof [[PROF9:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i1 [ [[RETA:%.*]], [[A]] ], [ [[RET:%.*]], [[END]] ] ; CHECK-NEXT: ret i1 [[COMMON_RET_OP]] @@ -381,7 +381,7 @@ define void @test10(i32 %x) nounwind readnone ssp noredzone { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], -1 ; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3 -; CHECK-NEXT: br i1 [[SWITCH]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]], !prof [[PROF9:![0-9]+]] +; CHECK-NEXT: br i1 [[SWITCH]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]], !prof [[PROF10:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: lor.rhs: @@ -413,7 +413,7 @@ define void @test11(i32 %x) nounwind { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[I:%.*]] = shl i32 [[X:%.*]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I]], 24 -; CHECK-NEXT: br i1 [[COND]], label [[C:%.*]], label [[A:%.*]], !prof [[PROF10:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[C:%.*]], label [[A:%.*]], !prof [[PROF11:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: a: @@ -500,8 +500,8 @@ define void @test14(ptr %old, i32 %final) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[BIT_0]], 0 ; CHECK-NEXT: [[V3:%.*]] = load i32, ptr @max_regno, align 4 ; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[I_1]], [[V3]] -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[CMP4]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_EXIT:%.*]], label [[FOR_INC]], !prof [[PROF11:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[CMP4]], !prof [[PROF12:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_EXIT:%.*]], label [[FOR_INC]], !prof [[PROF12]] ; CHECK: for.inc: ; CHECK-NEXT: [[SHL]] = shl i32 [[BIT_0]], 1 ; CHECK-NEXT: [[INC19]] = add nsw i32 [[I_1]], 1 @@ -534,7 +534,7 @@ define i32 @HoistThenElseCodeToIf(i32 %n) { ; CHECK-LABEL: @HoistThenElseCodeToIf( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[N:%.*]], 0 -; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 1, i32 234, !prof [[PROF12:![0-9]+]] +; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 1, i32 234, !prof [[PROF6]] ; CHECK-NEXT: ret i32 [[DOT]] ; entry: @@ -557,8 +557,8 @@ return: define i32 @SimplifyCondBranchToCondBranch(i1 %cmpa, i1 %cmpb) { ; CHECK-LABEL: @SimplifyCondBranchToCondBranch( ; CHECK-NEXT: block1: -; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[CMPA:%.*]], i1 true, i1 [[CMPB:%.*]] -; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA]], i32 0, i32 2, !prof [[PROF13:![0-9]+]] +; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[CMPA:%.*]], i1 true, i1 [[CMPB:%.*]], !prof [[PROF13:![0-9]+]] +; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA]], i32 0, i32 2, !prof [[PROF13]] ; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof [[PROF14:![0-9]+]] ; CHECK-NEXT: ret i32 [[OUTVAL]] ; @@ -584,8 +584,8 @@ define i32 @SimplifyCondBranchToCondBranchSwap(i1 %cmpa, i1 %cmpb) { ; CHECK-NEXT: block1: ; CHECK-NEXT: [[CMPA_NOT:%.*]] = xor i1 [[CMPA:%.*]], true ; CHECK-NEXT: [[CMPB_NOT:%.*]] = xor i1 [[CMPB:%.*]], true -; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[CMPA_NOT]], i1 true, i1 [[CMPB_NOT]] -; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof [[PROF15:![0-9]+]] +; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[CMPA_NOT]], i1 true, i1 [[CMPB_NOT]], !prof [[PROF15:![0-9]+]] +; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof [[PROF15]] ; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof [[PROF16:![0-9]+]] ; CHECK-NEXT: ret i32 [[OUTVAL]] ; @@ -609,7 +609,7 @@ define i32 @SimplifyCondBranchToCondBranchSwapMissingWeight(i1 %cmpa, i1 %cmpb) ; CHECK-NEXT: block1: ; CHECK-NEXT: [[CMPA_NOT:%.*]] = xor i1 [[CMPA:%.*]], true ; CHECK-NEXT: [[CMPB_NOT:%.*]] = xor i1 [[CMPB:%.*]], true -; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[CMPA_NOT]], i1 true, i1 [[CMPB_NOT]] +; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[CMPA_NOT]], i1 true, i1 [[CMPB_NOT]], !prof [[PROF15]] ; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof [[PROF15]] ; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof [[PROF17:![0-9]+]] ; CHECK-NEXT: ret i32 [[OUTVAL]] @@ -701,8 +701,8 @@ define void @or_icmps_probably_not_harmful(i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF20:![0-9]+]], !unpredictable [[META21:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF20:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF20]], !unpredictable [[META21:![0-9]+]] ; CHECK: false: ; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -733,8 +733,8 @@ define void @or_icmps_not_that_harmful(i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF22:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF22:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF22]] ; CHECK: false: ; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -765,8 +765,8 @@ define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF23:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF23:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF23]] ; CHECK: false: ; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -796,8 +796,8 @@ define void @or_icmps_useful(i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF24:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF24:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF24]] ; CHECK: false: ; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -827,7 +827,7 @@ define void @or_icmps_useful_inverted(i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]], !prof [[PROF24]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof [[PROF24]] ; CHECK: false: ; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1 @@ -956,8 +956,8 @@ define void @and_icmps_not_that_harmful(i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF25:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF25:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF25]] ; CHECK: false: ; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -988,7 +988,7 @@ define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF25]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF25]] ; CHECK: false: ; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1 @@ -1019,8 +1019,8 @@ define void @and_icmps_useful(i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26:![0-9]+]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF26:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26]] ; CHECK: false: ; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -1050,7 +1050,7 @@ define void @and_icmps_useful_inverted(i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false, !prof [[PROF26]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof [[PROF26]] ; CHECK: false: ; CHECK-NEXT: store i8 42, ptr [[P:%.*]], align 1 @@ -1097,23 +1097,26 @@ exit: !20 = !{} ; . +; . +; . +;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind uwtable } ; CHECK: attributes #[[ATTR1]] = { nounwind } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { noredzone nounwind ssp memory(none) } -; . +;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 5, i32 11} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 5} ; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 3} ; CHECK: [[PROF3]] = !{!"branch_weights", i32 7, i32 1, i32 2} ; CHECK: [[PROF4]] = !{!"branch_weights", i32 49, i32 12, i32 24, i32 35} ; CHECK: [[PROF5]] = !{!"branch_weights", i32 11, i32 5} -; CHECK: [[PROF6]] = !{!"branch_weights", i32 17, i32 15} -; CHECK: [[PROF7]] = !{!"branch_weights", i32 9, i32 7} -; CHECK: [[PROF8]] = !{!"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17} -; CHECK: [[PROF9]] = !{!"branch_weights", i32 24, i32 33} -; CHECK: [[PROF10]] = !{!"branch_weights", i32 8, i32 33} -; CHECK: [[PROF11]] = !{!"branch_weights", i32 112017436, i32 -735157296} -; CHECK: [[PROF12]] = !{!"branch_weights", i32 3, i32 5} +; CHECK: [[PROF6]] = !{!"branch_weights", i32 3, i32 5} +; CHECK: [[PROF7]] = !{!"branch_weights", i32 17, i32 15} +; CHECK: [[PROF8]] = !{!"branch_weights", i32 9, i32 7} +; CHECK: [[PROF9]] = !{!"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17} +; CHECK: [[PROF10]] = !{!"branch_weights", i32 24, i32 33} +; CHECK: [[PROF11]] = !{!"branch_weights", i32 8, i32 33} +; CHECK: [[PROF12]] = !{!"branch_weights", i32 112017436, i32 -735157296} ; CHECK: [[PROF13]] = !{!"branch_weights", i32 2, i32 3} ; CHECK: [[PROF14]] = !{!"branch_weights", i32 34, i32 21} ; CHECK: [[PROF15]] = !{!"branch_weights", i32 3, i32 2} @@ -1128,4 +1131,4 @@ exit: ; CHECK: [[PROF24]] = !{!"branch_weights", i32 101, i32 99} ; CHECK: [[PROF25]] = !{!"branch_weights", i32 1, i32 197} ; CHECK: [[PROF26]] = !{!"branch_weights", i32 99, i32 101} -; . +;. From b22a97d9d28ad25ecf7c28c0b11e9bfca52923a5 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 12 Sep 2025 07:51:49 -0700 Subject: [PATCH 134/734] [Support] Merge two implementations of addRangeElementsImpl (NFC) (#158212) This patch uses "constexpr if" to merge two implementations of addRangeElementsImpl. While the line count does not change much, the "if" condition should be a lot more readable than in std::enable_if. --- llvm/include/llvm/Support/HashBuilder.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h index 097110874400d..17fbc3f96ed04 100644 --- a/llvm/include/llvm/Support/HashBuilder.h +++ b/llvm/include/llvm/Support/HashBuilder.h @@ -366,18 +366,16 @@ class HashBuilder : public HashBuilderBase { HashBuilder &addRangeElementsImpl(ForwardIteratorT First, ForwardIteratorT Last, std::forward_iterator_tag) { - for (auto It = First; It != Last; ++It) - add(*It); - return *this; - } - - template - std::enable_if_t::value && - Endianness == llvm::endianness::native, - HashBuilder &> - addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { - this->update(ArrayRef(reinterpret_cast(First), - (Last - First) * sizeof(T))); + using T = typename std::iterator_traits::value_type; + if constexpr (std::is_pointer_v && + hashbuilder_detail::IsHashableData::value && + Endianness == llvm::endianness::native) { + this->update(ArrayRef(reinterpret_cast(First), + (Last - First) * sizeof(T))); + } else { + for (auto It = First; It != Last; ++It) + add(*It); + } return *this; } }; From 0d7f66d49cd80c5c688433427c74ae9b32e818f7 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 12 Sep 2025 07:51:56 -0700 Subject: [PATCH 135/734] [Sema] Use llvm::is_contained instead of llvm::all_of (NFC) (#158213) The code in question uses llvm::all_of and llvm::identity to see if every pointer is nonnull: Ptr1 && Ptr2 && Ptr3 && ... This patch simplifies the expression by checking for the absence of nullptr with !llvm::is_contained. --- clang/lib/Sema/HeuristicResolver.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Sema/HeuristicResolver.cpp b/clang/lib/Sema/HeuristicResolver.cpp index 6d79f3feeaace..29840a430292e 100644 --- a/clang/lib/Sema/HeuristicResolver.cpp +++ b/clang/lib/Sema/HeuristicResolver.cpp @@ -13,7 +13,6 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/Type.h" -#include "llvm/ADT/identity.h" namespace clang { @@ -562,7 +561,7 @@ HeuristicResolverImpl::getFunctionProtoTypeLoc(const Expr *Fn) { // In some edge cases the AST can contain a "trivial" FunctionProtoTypeLoc // which has null parameters. Avoid these as they don't contain useful // information. - if (llvm::all_of(F.getParams(), llvm::identity())) + if (!llvm::is_contained(F.getParams(), nullptr)) return F; } From 43906cb0867383b32781b7647f5b1bec7109ddfe Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 12 Sep 2025 07:52:04 -0700 Subject: [PATCH 136/734] [llvm] Proofread ConvergentOperations.rst (#158214) --- llvm/docs/ConvergentOperations.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/docs/ConvergentOperations.rst b/llvm/docs/ConvergentOperations.rst index 5081efffc89ac..cdd3e89aba1f4 100644 --- a/llvm/docs/ConvergentOperations.rst +++ b/llvm/docs/ConvergentOperations.rst @@ -13,7 +13,7 @@ Some parallel execution environments execute threads in groups that allow efficient communication within the group using special primitives called *convergent* operations. The outcome of a convergent operation is sensitive to the set of threads that executes it "together", i.e., convergently. When control -flow :ref:`diverges `, i.e. threads of the same +flow :ref:`diverges `, i.e., threads of the same group follow different paths through the CFG, not all threads of the group may be available to participate in this communication. This is the defining characteristic that @@ -41,7 +41,7 @@ In structured programming languages, there is often an intuitive and unambiguous way of determining the threads that are expected to communicate. However, this is not always the case even in structured programming languages, and the intuition breaks down entirely in unstructured control flow. This -document describes the formal semantics in LLVM, i.e. how to determine the set +document describes the formal semantics in LLVM, i.e., how to determine the set of communicating threads for convergent operations. The definitions in this document leave many details open, such as how groups of @@ -449,15 +449,15 @@ Consider the following example: // E } -In this program, the call to convergent_op() is lexically "inside" the ``for`` +In this program, the call to ``convergent_op()`` is lexically "inside" the ``for`` loop. But when translated to LLVM IR, the basic block B is an exiting block ending in a divergent branch, and the basic block C is an exit of the loop. -Thus, the call to convergent_op() is outside the loop. This causes a mismatch +Thus, the call to ``convergent_op()`` is outside the loop. This causes a mismatch between the programmer's expectation and the compiled program. The call should be executed convergently on every iteration of the loop, by threads that together take the branch to exit the loop. But when compiled, all threads that take the divergent exit on different iterations first converge at the beginning -of basic block C and then together execute the call to convergent_op(). +of basic block C and then together execute the call to ``convergent_op()``. In this case, :ref:`llvm.experimental.convergence.loop ` can be used to express the desired @@ -588,18 +588,18 @@ indirectly. token @llvm.experimental.convergence.entry() convergent readnone -This intrinsic is used to tie the dynamic instances inside of a function to +This intrinsic is used to tie the dynamic instances inside a function to those in the caller. 1. If the function is called from outside the scope of LLVM, the convergence of - dynamic instances of this intrinsic are environment-defined. For example: + dynamic instances of this intrinsic is environment-defined. For example: a. In an OpenCL *kernel launch*, the maximal set of threads that can communicate outside the memory model is a *workgroup*. Hence, a suitable choice is to specify that all the threads from a single workgroup in OpenCL execute converged dynamic instances of this intrinsic. - b. In a C/C++ program, threads are launched independently and they can + b. In a C/C++ program, threads are launched independently and can communicate only through the memory model. Hence the dynamic instances of this intrinsic in a C/C++ program are never converged. 2. If the function is called from a call-site in LLVM IR, then two @@ -701,7 +701,7 @@ convergent operation in the same basic block. token @llvm.experimental.convergence.anchor() convergent readnone -This intrinsic produces an initial convergence token that is independent from +This intrinsic produces an initial convergence token that is independent of any "outer scope". The set of threads executing converged dynamic instances of this intrinsic is implementation-defined. @@ -1483,7 +1483,7 @@ There is no guarantee about the value of ``%id`` in the threads where hoisting ``@subgroupShuffle`` might introduce UB. On the other hand, if ``@subgroupShuffle`` is defined such that it merely -produces an undefined value or poison as result when ``%id`` is "out of range", +produces an undefined value or poison as a result when ``%id`` is "out of range", then speculating is okay. Even though @@ -1502,7 +1502,7 @@ Assuming that ``%tok`` is only used inside the conditional block, the anchor can be sunk. The rationale is two-fold. First, the anchor has implementation-defined behavior, and the sinking is part of the implementation. Second, already in the original program, the set of threads that communicates in the -``@convergent.operation`` is automatically subset to the threads for which +``@convergent.operation`` is automatically a subset of the threads for which ``condition`` is true. Anchors can be hoisted in acyclic control flow. For example: From b0cb4e17e6ee362bbd8311adf2da7f3acb625fee Mon Sep 17 00:00:00 2001 From: dyung Date: Fri, 12 Sep 2025 15:52:50 +0100 Subject: [PATCH 137/734] Fix test on Windows by telling diff to ignore Windows-specific line endings. (#158297) Should fix bot: https://lab.llvm.org/buildbot/#/builders/46/builds/23206 --- llvm/utils/lit/tests/diff-test-update.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/utils/lit/tests/diff-test-update.py b/llvm/utils/lit/tests/diff-test-update.py index ad14034a85a17..8b9f4610f7f95 100644 --- a/llvm/utils/lit/tests/diff-test-update.py +++ b/llvm/utils/lit/tests/diff-test-update.py @@ -8,13 +8,13 @@ # RUN: not %{lit} --update-tests -v %S/Inputs/diff-test-update | FileCheck %s -# RUN: diff %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file.test -# RUN: diff %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file-populated.test -# RUN: diff %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file.test -# RUN: diff %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file-populated.test -# RUN: diff %S/Inputs/diff-test-update/single-split-file-no-expected.out %S/Inputs/diff-test-update/single-split-file-no-expected.test -# RUN: diff %S/Inputs/diff-test-update/split-c-comments.out %S/Inputs/diff-test-update/split-c-comments.test -# RUN: diff %S/Inputs/diff-test-update/split-whitespace.out "%S/Inputs/diff-test-update/split whitespace.test" +# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file.test +# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/single-split-file.out %S/Inputs/diff-test-update/single-split-file-populated.test +# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file.test +# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/multiple-split-file.out %S/Inputs/diff-test-update/multiple-split-file-populated.test +# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/single-split-file-no-expected.out %S/Inputs/diff-test-update/single-split-file-no-expected.test +# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/split-c-comments.out %S/Inputs/diff-test-update/split-c-comments.test +# RUN: diff --strip-trailing-cr %S/Inputs/diff-test-update/split-whitespace.out "%S/Inputs/diff-test-update/split whitespace.test" # CHECK: # update-diff-test: could not deduce source and target from {{.*}}1.in and {{.*}}2.in From b2521ae01c3ae777c088960e0edbc4cf417f6dbb Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 12 Sep 2025 15:54:16 +0100 Subject: [PATCH 138/734] [mlir][IR] Fix build error with gcc-7 (#158305) Fix build after #156825. --- mlir/lib/IR/Remarks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/IR/Remarks.cpp b/mlir/lib/IR/Remarks.cpp index 29088bd360e23..a55f61aff77bb 100644 --- a/mlir/lib/IR/Remarks.cpp +++ b/mlir/lib/IR/Remarks.cpp @@ -284,7 +284,7 @@ buildFilter(const mlir::remark::RemarkCategories &cats, if (!rx.isValid(err)) return std::nullopt; - return rx; + return std::make_optional(std::move(rx)); } RemarkEngine::RemarkEngine(bool printAsEmitRemarks, From af53104fa4cc104b678b8cd204fcffc2680871c8 Mon Sep 17 00:00:00 2001 From: Karlo Basioli Date: Fri, 12 Sep 2025 15:54:31 +0100 Subject: [PATCH 139/734] Fix bazel build issue - from #158298 (#158307) --- utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index e17cdb28286a2..469fcee8d9748 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -535,6 +535,7 @@ cc_library( "//mlir:MemRefToSPIRV", "//mlir:MemRefTransforms", "//mlir:Pass", + "//mlir:ReconcileUnrealizedCasts", "//mlir:Rewrite", "//mlir:SCFToSPIRV", "//mlir:SPIRVConversion", From 04320c0d24350d2e76e2ea4c94f05ca2bad9c736 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 12 Sep 2025 15:57:11 +0100 Subject: [PATCH 140/734] [mlir][Transforms][NFC] Remove `reconcileUnrealizedCasts` forward-declaration (#158291) This is a follow-up to https://github.com/llvm/llvm-project/pull/158067/files#r2343711946. --- .../Transforms/Utils/DialectConversion.cpp | 295 +++++++++--------- 1 file changed, 145 insertions(+), 150 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index d53e1e78f2027..f7565cfb0e45e 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -3097,6 +3097,151 @@ unsigned OperationLegalizer::applyCostModelToPatterns( return minDepth; } +//===----------------------------------------------------------------------===// +// Reconcile Unrealized Casts +//===----------------------------------------------------------------------===// + +/// Try to reconcile all given UnrealizedConversionCastOps and store the +/// left-over ops in `remainingCastOps` (if provided). See documentation in +/// DialectConversion.h for more details. +/// The `isCastOpOfInterestFn` is used to filter the cast ops to proceed: the +/// algorithm may visit an operand (or user) which is a cast op, but will not +/// try to reconcile it if not in the filtered set. +template +static void reconcileUnrealizedCastsImpl( + RangeT castOps, + function_ref isCastOpOfInterestFn, + SmallVectorImpl *remainingCastOps) { + // A worklist of cast ops to process. + SetVector worklist(llvm::from_range, castOps); + + // Helper function that return the unrealized_conversion_cast op that + // defines all inputs of the given op (in the same order). Return "nullptr" + // if there is no such op. + auto getInputCast = + [](UnrealizedConversionCastOp castOp) -> UnrealizedConversionCastOp { + if (castOp.getInputs().empty()) + return {}; + auto inputCastOp = + castOp.getInputs().front().getDefiningOp(); + if (!inputCastOp) + return {}; + if (inputCastOp.getOutputs() != castOp.getInputs()) + return {}; + return inputCastOp; + }; + + // Process ops in the worklist bottom-to-top. + while (!worklist.empty()) { + UnrealizedConversionCastOp castOp = worklist.pop_back_val(); + + // Traverse the chain of input cast ops to see if an op with the same + // input types can be found. + UnrealizedConversionCastOp nextCast = castOp; + while (nextCast) { + if (nextCast.getInputs().getTypes() == castOp.getResultTypes()) { + if (llvm::any_of(nextCast.getInputs(), [&](Value v) { + return v.getDefiningOp() == castOp; + })) { + // Ran into a cycle. + break; + } + + // Found a cast where the input types match the output types of the + // matched op. We can directly use those inputs. + castOp.replaceAllUsesWith(nextCast.getInputs()); + break; + } + nextCast = getInputCast(nextCast); + } + } + + // A set of all alive cast ops. I.e., ops whose results are (transitively) + // used by an op that is not a cast op. + DenseSet liveOps; + + // Helper function that marks the given op and transitively reachable input + // cast ops as alive. + auto markOpLive = [&](Operation *rootOp) { + SmallVector worklist; + worklist.push_back(rootOp); + while (!worklist.empty()) { + Operation *op = worklist.pop_back_val(); + if (liveOps.insert(op).second) { + // Successfully inserted: process reachable input cast ops. + for (Value v : op->getOperands()) + if (auto castOp = v.getDefiningOp()) + if (isCastOpOfInterestFn(castOp)) + worklist.push_back(castOp); + } + } + }; + + // Find all alive cast ops. + for (UnrealizedConversionCastOp op : castOps) { + // The op may have been marked live already as being an operand of another + // live cast op. + if (liveOps.contains(op.getOperation())) + continue; + // If any of the users is not a cast op, mark the current op (and its + // input ops) as live. + if (llvm::any_of(op->getUsers(), [&](Operation *user) { + auto castOp = dyn_cast(user); + return !castOp || !isCastOpOfInterestFn(castOp); + })) + markOpLive(op); + } + + // Erase all dead cast ops. + for (UnrealizedConversionCastOp op : castOps) { + if (liveOps.contains(op)) { + // Op is alive and was not erased. Add it to the remaining cast ops. + if (remainingCastOps) + remainingCastOps->push_back(op); + continue; + } + + // Op is dead. Erase it. + op->dropAllUses(); + op->erase(); + } +} + +void mlir::reconcileUnrealizedCasts( + ArrayRef castOps, + SmallVectorImpl *remainingCastOps) { + // Set of all cast ops for faster lookups. + DenseSet castOpSet; + for (UnrealizedConversionCastOp op : castOps) + castOpSet.insert(op); + reconcileUnrealizedCasts(castOpSet, remainingCastOps); +} + +void mlir::reconcileUnrealizedCasts( + const DenseSet &castOps, + SmallVectorImpl *remainingCastOps) { + reconcileUnrealizedCastsImpl( + llvm::make_range(castOps.begin(), castOps.end()), + [&](UnrealizedConversionCastOp castOp) { + return castOps.contains(castOp); + }, + remainingCastOps); +} + +namespace mlir { +static void reconcileUnrealizedCasts( + const DenseMap + &castOps, + SmallVectorImpl *remainingCastOps) { + reconcileUnrealizedCastsImpl( + castOps.keys(), + [&](UnrealizedConversionCastOp castOp) { + return castOps.contains(castOp); + }, + remainingCastOps); +} +} // namespace mlir + //===----------------------------------------------------------------------===// // OperationConverter //===----------------------------------------------------------------------===// @@ -3118,13 +3263,6 @@ enum OpConversionMode { } // namespace namespace mlir { - -// Predeclaration only. -static void reconcileUnrealizedCasts( - const DenseMap - &castOps, - SmallVectorImpl *remainingCastOps); - // This class converts operations to a given conversion target via a set of // rewrite patterns. The conversion behaves differently depending on the // conversion mode. @@ -3302,149 +3440,6 @@ LogicalResult OperationConverter::convertOperations(ArrayRef ops) { return success(); } -//===----------------------------------------------------------------------===// -// Reconcile Unrealized Casts -//===----------------------------------------------------------------------===// - -/// Try to reconcile all given UnrealizedConversionCastOps and store the -/// left-over ops in `remainingCastOps` (if provided). See documentation in -/// DialectConversion.h for more details. -/// The `isCastOpOfInterestFn` is used to filter the cast ops to proceed: the -/// algorithm may visit an operand (or user) which is a cast op, but will not -/// try to reconcile it if not in the filtered set. -template -static void reconcileUnrealizedCastsImpl( - RangeT castOps, - function_ref isCastOpOfInterestFn, - SmallVectorImpl *remainingCastOps) { - // A worklist of cast ops to process. - SetVector worklist(llvm::from_range, castOps); - - // Helper function that return the unrealized_conversion_cast op that - // defines all inputs of the given op (in the same order). Return "nullptr" - // if there is no such op. - auto getInputCast = - [](UnrealizedConversionCastOp castOp) -> UnrealizedConversionCastOp { - if (castOp.getInputs().empty()) - return {}; - auto inputCastOp = - castOp.getInputs().front().getDefiningOp(); - if (!inputCastOp) - return {}; - if (inputCastOp.getOutputs() != castOp.getInputs()) - return {}; - return inputCastOp; - }; - - // Process ops in the worklist bottom-to-top. - while (!worklist.empty()) { - UnrealizedConversionCastOp castOp = worklist.pop_back_val(); - - // Traverse the chain of input cast ops to see if an op with the same - // input types can be found. - UnrealizedConversionCastOp nextCast = castOp; - while (nextCast) { - if (nextCast.getInputs().getTypes() == castOp.getResultTypes()) { - if (llvm::any_of(nextCast.getInputs(), [&](Value v) { - return v.getDefiningOp() == castOp; - })) { - // Ran into a cycle. - break; - } - - // Found a cast where the input types match the output types of the - // matched op. We can directly use those inputs. - castOp.replaceAllUsesWith(nextCast.getInputs()); - break; - } - nextCast = getInputCast(nextCast); - } - } - - // A set of all alive cast ops. I.e., ops whose results are (transitively) - // used by an op that is not a cast op. - DenseSet liveOps; - - // Helper function that marks the given op and transitively reachable input - // cast ops as alive. - auto markOpLive = [&](Operation *rootOp) { - SmallVector worklist; - worklist.push_back(rootOp); - while (!worklist.empty()) { - Operation *op = worklist.pop_back_val(); - if (liveOps.insert(op).second) { - // Successfully inserted: process reachable input cast ops. - for (Value v : op->getOperands()) - if (auto castOp = v.getDefiningOp()) - if (isCastOpOfInterestFn(castOp)) - worklist.push_back(castOp); - } - } - }; - - // Find all alive cast ops. - for (UnrealizedConversionCastOp op : castOps) { - // The op may have been marked live already as being an operand of another - // live cast op. - if (liveOps.contains(op.getOperation())) - continue; - // If any of the users is not a cast op, mark the current op (and its - // input ops) as live. - if (llvm::any_of(op->getUsers(), [&](Operation *user) { - auto castOp = dyn_cast(user); - return !castOp || !isCastOpOfInterestFn(castOp); - })) - markOpLive(op); - } - - // Erase all dead cast ops. - for (UnrealizedConversionCastOp op : castOps) { - if (liveOps.contains(op)) { - // Op is alive and was not erased. Add it to the remaining cast ops. - if (remainingCastOps) - remainingCastOps->push_back(op); - continue; - } - - // Op is dead. Erase it. - op->dropAllUses(); - op->erase(); - } -} - -void mlir::reconcileUnrealizedCasts( - ArrayRef castOps, - SmallVectorImpl *remainingCastOps) { - // Set of all cast ops for faster lookups. - DenseSet castOpSet; - for (UnrealizedConversionCastOp op : castOps) - castOpSet.insert(op); - reconcileUnrealizedCasts(castOpSet, remainingCastOps); -} - -void mlir::reconcileUnrealizedCasts( - const DenseSet &castOps, - SmallVectorImpl *remainingCastOps) { - reconcileUnrealizedCastsImpl( - llvm::make_range(castOps.begin(), castOps.end()), - [&](UnrealizedConversionCastOp castOp) { - return castOps.contains(castOp); - }, - remainingCastOps); -} - -static void mlir::reconcileUnrealizedCasts( - const DenseMap - &castOps, - SmallVectorImpl *remainingCastOps) { - reconcileUnrealizedCastsImpl( - castOps.keys(), - [&](UnrealizedConversionCastOp castOp) { - return castOps.contains(castOp); - }, - remainingCastOps); -} - //===----------------------------------------------------------------------===// // Type Conversion //===----------------------------------------------------------------------===// From 6c11130bcd4cb636e13d55d2df7f6072069bbe07 Mon Sep 17 00:00:00 2001 From: Karlo Basioli Date: Fri, 12 Sep 2025 16:10:15 +0100 Subject: [PATCH 141/734] Fix bazel build issue - caused in #156825 (#158313) --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index e556d65dba002..5042198d78b74 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -9082,7 +9082,9 @@ cc_library( ":Parser", ":Pass", ":PluginsLib", + ":RemarkStreamer", ":Support", + "//llvm:Remarks", "//llvm:Support", ], ) From 149f91bad66972ad8bf0add5c79bf74055f6905a Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Fri, 12 Sep 2025 17:20:16 +0200 Subject: [PATCH 142/734] [compiler-rt][AArch64] Don't use x18 in __arm_sme_save (#157802) The AAPCS recommends avoiding the use of x18 as it may be used for other purposes such as a shadow call stack. In this particular case it could just as well use x16 instead. --- compiler-rt/lib/builtins/aarch64/sme-abi.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S index d5510ac0cfa50..1713a5969459a 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi.S +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -280,17 +280,17 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_save) mov w16, #1 str x16, [x0] - add x18, x0, #32 + add x16, x0, #32 tbz x17, #FEAT_SME2_BIT, 1f // Store ZT0 - str zt0, [x18] - add x18, x18, #64 + str zt0, [x16] + add x16, x16, #64 1: - // Set up lazy-save (x18 = pointer to buffer) + // Set up lazy-save (x16 = pointer to buffer) rdsvl x17, #1 - str x18, [x0, #16]! + str x16, [x0, #16]! strh w17, [x0, #8] strh wzr, [x0, #10] str wzr, [x0, #12] From be587941c22f16df6fb2053cc06cf91c5a378613 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Fri, 12 Sep 2025 11:32:10 -0400 Subject: [PATCH 143/734] [mlir] Self-nominate for arith dialect maintenance (#157355) Following https://llvm.org/docs/DeveloperPolicy.html#maintainers, I'd like to self-nominate for arith dialect maintenance. As per the policy: > Maintainers are volunteering to take on the following shared responsibilities within an area of a project: > ... I believe I've been already performing most of the maintenance duties over the past few years, including direct code contributions, code reviews, and both starting and participating in relevant RFCs on discourse. You can look those up with: * `git log --author=Jakub --oneline -- 'mlir/include/mlir/Dialect/Arith*' 'mlir/lib/Dialect/Arith*'` * https://github.com/llvm/llvm-project/pulls?q=is%3Apr+label%3Amlir%3Aarith+reviewed-by%3Akuhar * Some notable RFCs authored: https://discourse.llvm.org/t/rfc-define-precise-arith-semantics/65507, https://discourse.llvm.org/t/rfc-poison-semantics-for-mlir/66245, https://discourse.llvm.org/t/rfc-arith-add-extended-multiplication-ops/66869, https://discourse.llvm.org/t/rfc-add-integer-add-with-carry-op-to-arith/64573, https://discourse.llvm.org/t/rfc-arith-should-we-support-scalar-vector-arith-bitcast-s/65427. In addition to the `core` category maintainers, I can bring additional perspective as I care both about conversion to llvm (as a user) and to spirv (as a maintainer). --- mlir/Maintainers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/Maintainers.md b/mlir/Maintainers.md index 02e93eb658279..5d3b576c2e751 100644 --- a/mlir/Maintainers.md +++ b/mlir/Maintainers.md @@ -46,7 +46,7 @@ dialects, build system and language bindings. * ‘ptr’ Dialect ([fabianmcg](https://github.com/fabianmcg)) #### Basic Compute Dialects -* ‘arith’ Dialect (core) +* ‘arith’ Dialect (core + [kuhar](https://github.com/kuhar)) * ‘math’ Dialect (core) * Rewrite System Dialects (core) * Transform Dialect ([martin-luecke](https://github.com/martin-luecke), [ftynse](https://github.com/ftynse), [rolfmorel](https://github.com/rolfmorel)) From 7ebfcbd0ec525810d3874b5826ac1cb53f14c6e4 Mon Sep 17 00:00:00 2001 From: Jeaye Wilkerson Date: Fri, 12 Sep 2025 08:34:14 -0700 Subject: [PATCH 144/734] Allow for custom code model in clang::Interpreter (#156977) This is necessary when using ASan, since the larger code size will lead to errors such as: ``` JIT session error: In graph clojure_core-clojure.core$clojure_core_cpp_cast_24538-24543-jitted-objectbuffer, section .eh_frame: relocation target 0x7bffe374b000 (DW.ref.__gxx_personality_v0) is out of range of Delta32 fixup at address 0x7bffe374b000 ( @ 0x7fffebf48158 + 0x13) ``` Previously, `clang::Interpreter` would hard-code the usage of a small code model. With this change, we default to small, but allow for custom values. This related to #102858 and #135401. There is no change to default behavior here. @lhames for review. --- clang/include/clang/Interpreter/Interpreter.h | 4 +++- clang/lib/Interpreter/Interpreter.cpp | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index 61af7bf762d5e..fcc270a17001e 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -135,11 +135,13 @@ class Interpreter { std::string OrcRuntimePath = ""; /// PID of the out-of-process JIT executor. uint32_t ExecutorPID = 0; + /// An optional code model to provide to the JITTargetMachineBuilder + std::optional CM = std::nullopt; JITConfig() : IsOutOfProcess(false), OOPExecutor(""), OOPExecutorConnect(""), UseSharedMemory(false), SlabAllocateSize(0), OrcRuntimePath(""), - ExecutorPID(0) {} + ExecutorPID(0), CM(std::nullopt) {} }; protected: diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 043e0c1e5754e..84f1c363b5f6f 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -647,6 +647,8 @@ llvm::Error Interpreter::CreateExecutor(JITConfig Config) { auto JTMB = createJITTargetMachineBuilder(TT); if (!JTMB) return JTMB.takeError(); + if (Config.CM) + JTMB->setCodeModel(Config.CM); auto JB = IncrementalExecutor::createDefaultJITBuilder(std::move(*JTMB)); if (!JB) return JB.takeError(); From ead4f3e271fdf6918aef2ede3a7134811147d276 Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Fri, 12 Sep 2025 16:35:58 +0100 Subject: [PATCH 145/734] [InstCombine] Canonicalize active lane mask params (#158065) Rewrite active lane mask intrinsics to begin their range from 0 when both parameters are constant integers. --- .../InstCombine/InstCombineCalls.cpp | 13 +++++++ .../InstCombine/get_active_lane_mask.ll | 38 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/get_active_lane_mask.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 11bac7bdb6eb2..17cf4154f8dbd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3952,6 +3952,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } + case Intrinsic::get_active_lane_mask: { + const APInt *Op0, *Op1; + if (match(II->getOperand(0), m_StrictlyPositive(Op0)) && + match(II->getOperand(1), m_APInt(Op1))) { + Type *OpTy = II->getOperand(0)->getType(); + return replaceInstUsesWith( + *II, Builder.CreateIntrinsic( + II->getType(), Intrinsic::get_active_lane_mask, + {Constant::getNullValue(OpTy), + ConstantInt::get(OpTy, Op1->usub_sat(*Op0))})); + } + break; + } default: { // Handle target specific intrinsics std::optional V = targetInstCombineIntrinsic(*II); diff --git a/llvm/test/Transforms/InstCombine/get_active_lane_mask.ll b/llvm/test/Transforms/InstCombine/get_active_lane_mask.ll new file mode 100644 index 0000000000000..c642904cc275b --- /dev/null +++ b/llvm/test/Transforms/InstCombine/get_active_lane_mask.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define @rewrite_range_nxv4i1() { +; CHECK-LABEL: define @rewrite_range_nxv4i1() { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 3) +; CHECK-NEXT: ret [[MASK]] +; + %mask = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4) + ret %mask +} + +define @rewrite_range_nxv16i1() { +; CHECK-LABEL: define @rewrite_range_nxv16i1() { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 7) +; CHECK-NEXT: ret [[MASK]] +; + %mask = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 123123, i64 123130) + ret %mask +} + +define @rewrite_range_nxv16i1_i128() { +; CHECK-LABEL: define @rewrite_range_nxv16i1_i128() { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i128(i128 0, i128 10) +; CHECK-NEXT: ret [[MASK]] +; + %mask = call @llvm.get.active.lane.mask.nxv16i1.i128(i128 18446744073709551616, i128 18446744073709551626) + ret %mask +} + +define @bail_lhs_is_zero() { +; CHECK-LABEL: define @bail_lhs_is_zero() { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) +; CHECK-NEXT: ret [[MASK]] +; + %mask = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) + ret %mask +} From ed1f1b88e49b244658aebc8a48dc8cd458363e70 Mon Sep 17 00:00:00 2001 From: Alex Trotta <44127594+Ahajha@users.noreply.github.com> Date: Fri, 12 Sep 2025 11:53:34 -0400 Subject: [PATCH 146/734] Revert "[bazel][mlir][python] Port #155741: stub auto-generation (#157173)" (#157995) This reverts commit 46d8fdd86ec79ba241b0db6c7fedc835902bc960. The whole set of commits got reverted in https://github.com/llvm/llvm-project/pull/157831, reverting this one too. --- utils/bazel/WORKSPACE | 6 ++-- .../mlir/python/BUILD.bazel | 36 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/utils/bazel/WORKSPACE b/utils/bazel/WORKSPACE index 20c0fd1f4c985..da69e1d7cf5a7 100644 --- a/utils/bazel/WORKSPACE +++ b/utils/bazel/WORKSPACE @@ -186,9 +186,9 @@ maybe( http_archive, name = "nanobind", build_file = "@llvm-raw//utils/bazel/third_party_build:nanobind.BUILD", - sha256 = "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90", - strip_prefix = "nanobind-2.9.2", - url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz", + sha256 = "bb35deaed7efac5029ed1e33880a415638352f757d49207a8e6013fefb6c49a7", + strip_prefix = "nanobind-2.4.0", + url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.4.0.tar.gz", ) load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel index 944a911bccc17..016794d30b349 100644 --- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel @@ -32,6 +32,13 @@ filegroup( ], ) +filegroup( + name = "ExecutionEnginePyIFiles", + srcs = [ + "mlir/_mlir_libs/_mlirExecutionEngine.pyi", + ], +) + filegroup( name = "IRPyFiles", srcs = [ @@ -46,6 +53,14 @@ filegroup( ]), ) +filegroup( + name = "IRPyIFiles", + srcs = [ + "mlir/_mlir_libs/_mlir/__init__.pyi", + "mlir/_mlir_libs/_mlir/ir.pyi", + ], +) + filegroup( name = "MlirLibsPyFiles", srcs = [ @@ -60,6 +75,13 @@ filegroup( ], ) +filegroup( + name = "PassManagerPyIFiles", + srcs = [ + "mlir/_mlir_libs/_mlir/passmanager.pyi", + ], +) + filegroup( name = "RewritePyFiles", srcs = [ @@ -637,6 +659,13 @@ gentbl_filegroup( ], ) +filegroup( + name = "PDLPyIFiles", + srcs = [ + "mlir/_mlir_libs/_mlir/dialects/pdl.pyi", + ], +) + filegroup( name = "PDLPyFiles", srcs = [ @@ -727,6 +756,13 @@ filegroup( # Quant dialect. ##---------------------------------------------------------------------------## +filegroup( + name = "QuantPyIFiles", + srcs = [ + "mlir/_mlir_libs/_mlir/dialects/quant.pyi", + ], +) + filegroup( name = "QuantPyFiles", srcs = [ From b8eaceb39b0b2e4f6493decbee2b66e239e720e9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 12 Sep 2025 17:06:26 +0100 Subject: [PATCH 147/734] [VPlan] Explicitly replicate VPInstructions by VF. (#155102) Extend replicateByVF added in #142433 (aa240293190) to also explicitly unroll replicating VPInstructions. Now the only remaining case where we replicate for all lanes is VPReplicateRecipes in replicate regions. PR: https://github.com/llvm/llvm-project/pull/155102 --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 30 ++------ llvm/lib/Transforms/Vectorize/VPlan.h | 21 +++--- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 33 +++------ .../Transforms/Vectorize/VPlanTransforms.cpp | 33 +++++---- .../Transforms/Vectorize/VPlanTransforms.h | 8 +-- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 68 ++++++++++++------- .../LoopVectorize/pointer-induction.ll | 7 +- .../LoopVectorize/predicate-switch.ll | 26 ++++--- .../LoopVectorize/vplan-predicate-switch.ll | 12 ++-- 9 files changed, 119 insertions(+), 119 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 16b1b539345de..e3244623ee968 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -343,37 +343,21 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { LastLane = 0; } - auto *LastInst = cast(get(Def, LastLane)); + // We need to construct the vector value for a single-scalar value by + // broadcasting the scalar to all lanes. + // TODO: Replace by introducing Broadcast VPInstructions. + assert(IsSingleScalar && "must be a single-scalar at this point"); // Set the insert point after the last scalarized instruction or after the // last PHI, if LastInst is a PHI. This ensures the insertelement sequence // will directly follow the scalar definitions. auto OldIP = Builder.saveIP(); + auto *LastInst = cast(get(Def, LastLane)); auto NewIP = isa(LastInst) ? LastInst->getParent()->getFirstNonPHIIt() : std::next(BasicBlock::iterator(LastInst)); Builder.SetInsertPoint(&*NewIP); - - // However, if we are vectorizing, we need to construct the vector values. - // If the value is known to be uniform after vectorization, we can just - // broadcast the scalar value corresponding to lane zero. Otherwise, we - // construct the vector values using insertelement instructions. Since the - // resulting vectors are stored in State, we will only generate the - // insertelements once. - Value *VectorValue = nullptr; - if (IsSingleScalar) { - VectorValue = GetBroadcastInstrs(ScalarValue); - set(Def, VectorValue); - } else { - assert(!VF.isScalable() && "VF is assumed to be non scalable."); - assert(isa(Def) && - "Explicit BuildVector recipes must have" - "handled packing for non-VPInstructions."); - // Initialize packing with insertelements to start from poison. - VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF)); - for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane) - VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane); - set(Def, VectorValue); - } + Value *VectorValue = GetBroadcastInstrs(ScalarValue); + set(Def, VectorValue); Builder.restoreIP(OldIP); return VectorValue; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 53291a931530f..997a45b1470ef 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -908,6 +908,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { return R && classof(R); } + virtual VPRecipeWithIRFlags *clone() override = 0; + static inline bool classof(const VPSingleDefRecipe *U) { auto *R = dyn_cast(U); return R && classof(R); @@ -1061,13 +1063,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, VScale, }; -private: - typedef unsigned char OpcodeTy; - OpcodeTy Opcode; - - /// An optional name that can be used for the generated IR instruction. - const std::string Name; - /// Returns true if this VPInstruction generates scalar values for all lanes. /// Most VPInstructions generate a single value per part, either vector or /// scalar. VPReplicateRecipe takes care of generating multiple (scalar) @@ -1076,6 +1071,13 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, /// underlying ingredient. bool doesGeneratePerAllLanes() const; +private: + typedef unsigned char OpcodeTy; + OpcodeTy Opcode; + + /// An optional name that can be used for the generated IR instruction. + const std::string Name; + /// Returns true if we can generate a scalar for the first lane only if /// needed. bool canGenerateScalarForFirstLane() const; @@ -1085,11 +1087,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, /// existing value is returned rather than a generated one. Value *generate(VPTransformState &State); - /// Utility methods serving execute(): generates a scalar single instance of - /// the modeled instruction for a given lane. \returns the scalar generated - /// value for lane \p Lane. - Value *generatePerLane(VPTransformState &State, const VPLane &Lane); - #if !defined(NDEBUG) /// Return the number of operands determined by the opcode of the /// VPInstruction. Returns -1u if the number of operands cannot be determined diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bf51489543098..11846f863a3fa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -564,16 +564,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { } } -Value *VPInstruction::generatePerLane(VPTransformState &State, - const VPLane &Lane) { - IRBuilderBase &Builder = State.Builder; - - assert(getOpcode() == VPInstruction::PtrAdd && - "only PtrAdd opcodes are supported for now"); - return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), - State.get(getOperand(1), Lane), Name); -} - /// Create a conditional branch using \p Cond branching to the successors of \p /// VPBB. Note that the first successor is always forward (i.e. not created yet) /// while the second successor may already have been created (if it is a header @@ -1197,24 +1187,13 @@ void VPInstruction::execute(VPTransformState &State) { "Set flags not supported for the provided opcode"); if (hasFastMathFlags()) State.Builder.setFastMathFlags(getFastMathFlags()); - bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && - (vputils::onlyFirstLaneUsed(this) || - isVectorToScalar() || isSingleScalar()); - bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); - if (GeneratesPerAllLanes) { - for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue(); - Lane != NumLanes; ++Lane) { - Value *GeneratedValue = generatePerLane(State, VPLane(Lane)); - assert(GeneratedValue && "generatePerLane must produce a value"); - State.set(this, GeneratedValue, VPLane(Lane)); - } - return; - } - Value *GeneratedValue = generate(State); if (!hasResult()) return; assert(GeneratedValue && "generate must produce a value"); + bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && + (vputils::onlyFirstLaneUsed(this) || + isVectorToScalar() || isSingleScalar()); assert((((GeneratedValue->getType()->isVectorTy() || GeneratedValue->getType()->isStructTy()) == !GeneratesPerFirstLaneOnly) || @@ -1287,6 +1266,12 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::Broadcast: case VPInstruction::ReductionStartVector: return true; + case VPInstruction::BuildStructVector: + case VPInstruction::BuildVector: + // Before replicating by VF, Build(Struct)Vector uses all lanes of the + // operand, after replicating its operands only the first lane is used. + // Before replicating, it will have only a single operand. + return getNumOperands() > 1; case VPInstruction::PtrAdd: return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); case VPInstruction::WidePtrAdd: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 2cac5557daeee..fcd85ba9ab7f0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3695,34 +3695,39 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) { vp_depth_first_shallow(Plan.getEntry())); auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly( vp_depth_first_shallow(LoopRegion->getEntry())); - // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes, - // excluding ones in replicate regions. Those are not materialized explicitly - // yet. Those vector users are still handled in VPReplicateRegion::execute(), - // via shouldPack(). + // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and + // VPInstructions, excluding ones in replicate regions. Those are not + // materialized explicitly yet. Those vector users are still handled in + // VPReplicateRegion::execute(), via shouldPack(). // TODO: materialize build vectors for replicating recipes in replicating // regions. - // TODO: materialize build vectors for VPInstructions. for (VPBasicBlock *VPBB : concat(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - auto *RepR = dyn_cast(&R); - auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) { + if (!isa(&R)) + continue; + auto *DefR = cast(&R); + auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) { VPRegionBlock *ParentRegion = cast(U)->getParent()->getParent(); - return !U->usesScalars(RepR) || ParentRegion != LoopRegion; + return !U->usesScalars(DefR) || ParentRegion != LoopRegion; }; - if (!RepR || RepR->isSingleScalar() || - none_of(RepR->users(), UsesVectorOrInsideReplicateRegion)) + if ((isa(DefR) && + cast(DefR)->isSingleScalar()) || + (isa(DefR) && + (vputils::onlyFirstLaneUsed(DefR) || + !cast(DefR)->doesGeneratePerAllLanes())) || + none_of(DefR->users(), UsesVectorOrInsideReplicateRegion)) continue; - Type *ScalarTy = TypeInfo.inferScalarType(RepR); + Type *ScalarTy = TypeInfo.inferScalarType(DefR); unsigned Opcode = ScalarTy->isStructTy() ? VPInstruction::BuildStructVector : VPInstruction::BuildVector; - auto *BuildVector = new VPInstruction(Opcode, {RepR}); - BuildVector->insertAfter(RepR); + auto *BuildVector = new VPInstruction(Opcode, {DefR}); + BuildVector->insertAfter(DefR); - RepR->replaceUsesWithIf( + DefR->replaceUsesWithIf( BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion]( VPUser &U, unsigned) { return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 1957428fab799..69452a7e37572 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -158,10 +158,10 @@ struct VPlanTransforms { /// Explicitly unroll \p Plan by \p UF. static void unrollByUF(VPlan &Plan, unsigned UF); - /// Replace each VPReplicateRecipe outside on any replicate region in \p Plan - /// with \p VF single-scalar recipes. - /// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby - /// dissolving the latter. + /// Replace each replicating VPReplicateRecipe and VPInstruction outside of + /// any replicate region in \p Plan with \p VF single-scalar recipes. + /// TODO: Also replicate VPScalarIVSteps and VPReplicateRecipes inside + /// replicate regions, thereby dissolving the latter. static void replicateByVF(VPlan &Plan, ElementCount VF); /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 443df167378b0..ce5949485e63d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -463,15 +463,16 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) { VPlanTransforms::removeDeadRecipes(Plan); } -/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p -/// Def2LaneDefs to look up scalar definitions for operands of \RepR. -static VPReplicateRecipe * +/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or +/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar +/// definitions for operands of \DefR. +static VPRecipeWithIRFlags * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, - VPReplicateRecipe *RepR, VPLane Lane, + VPRecipeWithIRFlags *DefR, VPLane Lane, const DenseMap> &Def2LaneDefs) { // Collect the operands at Lane, creating extracts as needed. SmallVector NewOps; - for (VPValue *Op : RepR->operands()) { + for (VPValue *Op : DefR->operands()) { // If Op is a definition that has been unrolled, directly use the clone for // the corresponding lane. auto LaneDefs = Def2LaneDefs.find(Op); @@ -501,11 +502,24 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, NewOps.push_back(Ext); } - auto *New = - new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, - /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); - New->transferFlags(*RepR); - New->insertBefore(RepR); + VPRecipeWithIRFlags *New; + if (auto *RepR = dyn_cast(DefR)) { + // TODO: have cloning of replicate recipes also provide the desired result + // coupled with setting its operands to NewOps (deriving IsSingleScalar and + // Mask from the operands?) + New = + new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, + /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); + } else { + assert(isa(DefR) && + "DefR must be a VPReplicateRecipe or VPInstruction"); + New = DefR->clone(); + for (const auto &[Idx, Op] : enumerate(NewOps)) { + New->setOperand(Idx, Op); + } + } + New->transferFlags(*DefR); + New->insertBefore(DefR); return New; } @@ -530,34 +544,38 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { SmallVector ToRemove; for (VPBasicBlock *VPBB : VPBBsToUnroll) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - auto *RepR = dyn_cast(&R); - if (!RepR || RepR->isSingleScalar()) + if (!isa(&R) || + (isa(&R) && + cast(&R)->isSingleScalar()) || + (isa(&R) && + !cast(&R)->doesGeneratePerAllLanes())) continue; - VPBuilder Builder(RepR); - if (RepR->getNumUsers() == 0) { - // Create single-scalar version of RepR for all lanes. + auto *DefR = cast(&R); + VPBuilder Builder(DefR); + if (DefR->getNumUsers() == 0) { + // Create single-scalar version of DefR for all lanes. for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) - cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs); - RepR->eraseFromParent(); + cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs); + DefR->eraseFromParent(); continue; } - /// Create single-scalar version of RepR for all lanes. + /// Create single-scalar version of DefR for all lanes. SmallVector LaneDefs; for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) LaneDefs.push_back( - cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs)); + cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs)); - Def2LaneDefs[RepR] = LaneDefs; + Def2LaneDefs[DefR] = LaneDefs; /// Users that only demand the first lane can use the definition for lane /// 0. - RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) { - return U.onlyFirstLaneUsed(RepR); + DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) { + return U.onlyFirstLaneUsed(DefR); }); - // Update each build vector user that currently has RepR as its only + // Update each build vector user that currently has DefR as its only // operand, to have all LaneDefs as its operands. - for (VPUser *U : to_vector(RepR->users())) { + for (VPUser *U : to_vector(DefR->users())) { auto *VPI = dyn_cast(U); if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector && VPI->getOpcode() != VPInstruction::BuildStructVector)) @@ -569,7 +587,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { for (VPValue *LaneDef : drop_begin(LaneDefs)) VPI->addOperand(LaneDef); } - ToRemove.push_back(RepR); + ToRemove.push_back(DefR); } } for (auto *R : reverse(ToRemove)) diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index d2c53f47a6670..a633dfee066ed 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -33,6 +33,10 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3 @@ -649,9 +653,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) { ; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]] ; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]] ; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]] -; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]] -; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]] -; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] ; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; STRIDED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]] ; STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 97f4542bfe67a..87447b63f4383 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -22,6 +22,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] ; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] +; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1 ; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) ; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13) @@ -117,8 +119,12 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 ; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] ; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] +; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1 ; IC2-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] ; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] +; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0 +; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1 ; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1 @@ -338,21 +344,21 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: [[ENTRY:.*]]: ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC1-NEXT: i64 120, label %[[IF_THEN]] +; IC1-NEXT: i64 120, label %[[IF_THEN1]] ; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC1-NEXT: ] -; IC1: [[IF_THEN]]: +; IC1: [[IF_THEN1]]: ; IC1-NEXT: br label %[[LOOP_HEADER]] -; IC1: [[IF_THEN1:.*:]] +; IC1: [[IF_THEN:.*:]] ; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC1-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC1-NEXT: unreachable ; IC1: [[LOOP_LATCH]]: ; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -361,21 +367,21 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: [[ENTRY:.*]]: ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC2-NEXT: i64 120, label %[[IF_THEN]] +; IC2-NEXT: i64 120, label %[[IF_THEN1]] ; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC2-NEXT: ] -; IC2: [[IF_THEN]]: +; IC2: [[IF_THEN1]]: ; IC2-NEXT: br label %[[LOOP_HEADER]] -; IC2: [[IF_THEN1:.*:]] +; IC2: [[IF_THEN:.*:]] ; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC2-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC2-NEXT: unreachable ; IC2: [[LOOP_LATCH]]: ; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index 3d05ee7f27b5c..cf85f26992c2f 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -22,7 +22,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ] ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2> -; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]> +; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]> +; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1 ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]> ; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12> ; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13> @@ -36,7 +40,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR]]> +; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR_VEC]]> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: @@ -53,7 +57,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR]]> +; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR_VEC]]> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: @@ -70,7 +74,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR]]> +; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR_VEC]]> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: From 2491dc3d6fa6f5e7474fbdac28a8eefdeba52d49 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 12 Sep 2025 09:13:16 -0700 Subject: [PATCH 148/734] [Utils] Fix a warning This patch fixes: llvm/lib/Transforms/Utils/SimplifyCFG.cpp:338:6: error: unused function 'isSelectInRoleOfConjunctionOrDisjunction' [-Werror,-Wunused-function] --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index e5517409ded70..5a842f9b49c1b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -335,7 +335,8 @@ class SimplifyCFGOpt { // we synthesize a || b as select a, true, b // we synthesize a && b as select a, b, false // this function determines if SI is playing one of those roles. -bool isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) { +[[maybe_unused]] bool +isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) { return ((isa(SI->getTrueValue()) && (dyn_cast(SI->getTrueValue())->isOne())) || (isa(SI->getFalseValue()) && From c45aa5c764ffcd1f0a4ce9f006f266d664ea6f19 Mon Sep 17 00:00:00 2001 From: Vedant Paranjape Date: Fri, 12 Sep 2025 12:16:31 -0400 Subject: [PATCH 149/734] [InstCombine] Revert FSub optimization from #157757 (#158315) Since FSub X, 0 gets canoncialised to FAdd X, -0 the said optimization didn't make much sense for FSub. Remove it from IC and the adjoined testcase. --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 10 ---------- .../InstCombine/fold-fadd-with-zero-gh154238.ll | 11 ----------- 2 files changed, 21 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 726d09aa26941..00951fde0cf8a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -3155,16 +3155,6 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { Value *X, *Y; Constant *C; - // B = fsub A, 0.0 - // Z = Op B - // can be transformed into - // Z = Op A - // Where Op is such that we can ignore sign of 0 in fsub - Value *A; - if (match(&I, m_OneUse(m_FSub(m_Value(A), m_AnyZeroFP()))) && - canIgnoreSignBitOfZero(*I.use_begin())) - return replaceInstUsesWith(I, A); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X) // Canonicalize to fadd to make analysis easier. diff --git a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll index b9d951dc2945a..f9f0ca8a08bcb 100644 --- a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll +++ b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll @@ -24,14 +24,3 @@ define float @src2(float %arg1) { %v4 = fsub float %v2, %v3 ret float %v4 } - -define float @src_sub(float %arg1) { -; CHECK-LABEL: define float @src_sub( -; CHECK-SAME: float [[ARG1:%.*]]) { -; CHECK-NEXT: [[V3:%.*]] = call float @llvm.fabs.f32(float [[ARG1]]) -; CHECK-NEXT: ret float [[V3]] -; - %v2 = fsub float %arg1, 0.000000e+00 - %v3 = call float @llvm.fabs.f32(float %v2) - ret float %v3 -} From 5d088ba30440d37f180f6b2e2f2fcc25d5c77018 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Fri, 12 Sep 2025 09:17:48 -0700 Subject: [PATCH 150/734] [lldb] Track CFA pointer metadata in StackID (#157498) [lldb] Track CFA pointer metadata in StackID In this commit: 9c8e71644227 [lldb] Make StackID call Fix{Code,Data} pointers (#152796) We made StackID keep track of the CFA without any pointer metadata in it. This is necessary when comparing two StackIDs to determine which one is "younger". However, the CFA inside StackIDs is also used in other contexts through the method StackID::GetCallFrameAddress. One notable case is DWARFExpression: the computation of `DW_OP_call_frame_address` is done using StackID. This feeds into many other places, e.g. expression evaluation may require the address of a variable that is computed from the CFA; to access the variable without faulting, we may need to preserve the pointer metadata. As such, StackID must be able to provide both versions of the CFA. In the spirit of allowing consumers of pointers to decide what to do with pointer metadata, this patch changes StackID to store both versions of the cfa pointer. Two getter methods are provided, and all call sites except DWARFExpression preserve their existing behavior (stripped pointer). Other alternatives were considered: * Just store the raw pointer. This would require changing the comparisong operator `<` to also receive a Process, as the comparison requires stripped pointers. It wasn't clear if all call-sites had a non-null process, whereas we know we have a process when creating a StackID. * Store a weak pointer to the process inside the class, and then strip metadata as needed. This would require a `weak_ptr::lock` in many operations of LLDB, and it felt wasteful. It also prevents stripping of the pointer if the process has gone away. This patch also changes RegisterContextUnwind::ReadFrameAddress, which is the method computing the CFA fed into StackID, to also preserve the signature pointers. --- lldb/include/lldb/Target/StackID.h | 9 +- lldb/source/API/SBFrame.cpp | 2 +- lldb/source/Expression/DWARFExpression.cpp | 2 +- lldb/source/Target/RegisterContextUnwind.cpp | 8 - lldb/source/Target/StackFrameList.cpp | 2 +- lldb/source/Target/StackID.cpp | 11 +- .../Makefile | 11 + .../TestArmPointerMetadataCFADwarfExpr.py | 35 +++ .../main.s | 226 ++++++++++++++++++ 9 files changed, 290 insertions(+), 16 deletions(-) create mode 100644 lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/Makefile create mode 100644 lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/TestArmPointerMetadataCFADwarfExpr.py create mode 100644 lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/main.s diff --git a/lldb/include/lldb/Target/StackID.h b/lldb/include/lldb/Target/StackID.h index c2a5d733dcd69..18461533d648a 100644 --- a/lldb/include/lldb/Target/StackID.h +++ b/lldb/include/lldb/Target/StackID.h @@ -26,7 +26,11 @@ class StackID { lldb::addr_t GetPC() const { return m_pc; } - lldb::addr_t GetCallFrameAddress() const { return m_cfa; } + lldb::addr_t GetCallFrameAddressWithMetadata() const { + return m_cfa_with_metadata; + } + + lldb::addr_t GetCallFrameAddressWithoutMetadata() const { return m_cfa; } SymbolContextScope *GetSymbolContextScope() const { return m_symbol_scope; } @@ -62,6 +66,9 @@ class StackID { /// below) lldb::addr_t m_cfa = LLDB_INVALID_ADDRESS; + /// The cfa with metadata (i.e. prior to Process::FixAddress). + lldb::addr_t m_cfa_with_metadata = LLDB_INVALID_ADDRESS; + /// If nullptr, there is no block or symbol for this frame. If not nullptr, /// this will either be the scope for the lexical block for the frame, or the /// scope for the symbol. Symbol context scopes are always be unique pointers diff --git a/lldb/source/API/SBFrame.cpp b/lldb/source/API/SBFrame.cpp index b6724bb0c4119..42dbed490a33d 100644 --- a/lldb/source/API/SBFrame.cpp +++ b/lldb/source/API/SBFrame.cpp @@ -267,7 +267,7 @@ lldb::addr_t SBFrame::GetCFA() const { } if (StackFrame *frame = exe_ctx->GetFramePtr()) - return frame->GetStackID().GetCallFrameAddress(); + return frame->GetStackID().GetCallFrameAddressWithoutMetadata(); return LLDB_INVALID_ADDRESS; } diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 332cf2c86024a..5040351f4975b 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -2195,7 +2195,7 @@ llvm::Expected DWARFExpression::Evaluate( // Note that we don't have to parse FDEs because this DWARF expression // is commonly evaluated with a valid stack frame. StackID id = frame->GetStackID(); - addr_t cfa = id.GetCallFrameAddress(); + addr_t cfa = id.GetCallFrameAddressWithMetadata(); if (cfa != LLDB_INVALID_ADDRESS) { stack.push_back(Scalar(cfa)); stack.back().SetValueType(Value::ValueType::LoadAddress); diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp index 787eb94be3b48..3b018c09b8b72 100644 --- a/lldb/source/Target/RegisterContextUnwind.cpp +++ b/lldb/source/Target/RegisterContextUnwind.cpp @@ -2039,8 +2039,6 @@ bool RegisterContextUnwind::ReadFrameAddress( reg_info, cfa_reg_contents, reg_info->byte_size, reg_value); if (error.Success()) { address = reg_value.GetAsUInt64(); - if (abi_sp) - address = abi_sp->FixCodeAddress(address); UnwindLogMsg( "CFA value via dereferencing reg %s (%d): reg has val 0x%" PRIx64 ", CFA value is 0x%" PRIx64, @@ -2062,8 +2060,6 @@ bool RegisterContextUnwind::ReadFrameAddress( RegisterNumber cfa_reg(m_thread, row_register_kind, fa.GetRegisterNumber()); if (ReadGPRValue(cfa_reg, cfa_reg_contents)) { - if (abi_sp) - cfa_reg_contents = abi_sp->FixDataAddress(cfa_reg_contents); if (cfa_reg_contents == LLDB_INVALID_ADDRESS || cfa_reg_contents == 0 || cfa_reg_contents == 1) { UnwindLogMsg( @@ -2100,9 +2096,6 @@ bool RegisterContextUnwind::ReadFrameAddress( dwarfexpr.Evaluate(&exe_ctx, this, 0, nullptr, nullptr); if (result) { address = result->GetScalar().ULongLong(); - if (ABISP abi_sp = m_thread.GetProcess()->GetABI()) - address = abi_sp->FixCodeAddress(address); - UnwindLogMsg("CFA value set by DWARF expression is 0x%" PRIx64, address); return true; @@ -2143,7 +2136,6 @@ bool RegisterContextUnwind::ReadFrameAddress( } case UnwindPlan::Row::FAValue::isConstant: { address = fa.GetConstant(); - address = m_thread.GetProcess()->FixDataAddress(address); UnwindLogMsg("CFA value set by constant is 0x%" PRIx64, address); return true; } diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index fa5d159c0c91a..ccf874fc03ebd 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -449,7 +449,7 @@ bool StackFrameList::FetchFramesUpTo(uint32_t end_idx, } } else { unwind_frame_sp = m_frames.front(); - cfa = unwind_frame_sp->m_id.GetCallFrameAddress(); + cfa = unwind_frame_sp->m_id.GetCallFrameAddressWithoutMetadata(); } } else { // Check for interruption when building the frames. diff --git a/lldb/source/Target/StackID.cpp b/lldb/source/Target/StackID.cpp index f879276527dda..137c776a84d2f 100644 --- a/lldb/source/Target/StackID.cpp +++ b/lldb/source/Target/StackID.cpp @@ -17,7 +17,8 @@ using namespace lldb_private; StackID::StackID(lldb::addr_t pc, lldb::addr_t cfa, SymbolContextScope *symbol_scope, Process *process) - : m_pc(pc), m_cfa(cfa), m_symbol_scope(symbol_scope) { + : m_pc(pc), m_cfa(cfa), m_cfa_with_metadata(cfa), + m_symbol_scope(symbol_scope) { if (process) { m_pc = process->FixCodeAddress(m_pc); m_cfa = process->FixDataAddress(m_cfa); @@ -29,6 +30,7 @@ void StackID::SetPC(lldb::addr_t pc, Process *process) { } void StackID::SetCFA(lldb::addr_t cfa, Process *process) { + m_cfa_with_metadata = cfa; m_cfa = process ? process->FixDataAddress(cfa) : cfa; } @@ -49,7 +51,8 @@ void StackID::Dump(Stream *s) { } bool lldb_private::operator==(const StackID &lhs, const StackID &rhs) { - if (lhs.GetCallFrameAddress() != rhs.GetCallFrameAddress()) + if (lhs.GetCallFrameAddressWithoutMetadata() != + rhs.GetCallFrameAddressWithoutMetadata()) return false; SymbolContextScope *lhs_scope = lhs.GetSymbolContextScope(); @@ -67,8 +70,8 @@ bool lldb_private::operator!=(const StackID &lhs, const StackID &rhs) { } bool lldb_private::operator<(const StackID &lhs, const StackID &rhs) { - const lldb::addr_t lhs_cfa = lhs.GetCallFrameAddress(); - const lldb::addr_t rhs_cfa = rhs.GetCallFrameAddress(); + const lldb::addr_t lhs_cfa = lhs.GetCallFrameAddressWithoutMetadata(); + const lldb::addr_t rhs_cfa = rhs.GetCallFrameAddressWithoutMetadata(); // FIXME: We are assuming that the stacks grow downward in memory. That's not // necessary, but true on diff --git a/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/Makefile b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/Makefile new file mode 100644 index 0000000000000..f0de8ffca59fc --- /dev/null +++ b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/Makefile @@ -0,0 +1,11 @@ +ASM_SOURCES := main.s + +# This is to appease Makefile.rules, there is no main.c +C_SOURCES := main.c + +ASM_OBJS := $(ASM_SOURCES:.s=.o) + +%.o: %.s + $(CC) -c -x assembler $< -o $@ + +include Makefile.rules diff --git a/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/TestArmPointerMetadataCFADwarfExpr.py b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/TestArmPointerMetadataCFADwarfExpr.py new file mode 100644 index 0000000000000..839e0e1a4fc4d --- /dev/null +++ b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/TestArmPointerMetadataCFADwarfExpr.py @@ -0,0 +1,35 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +@skipUnlessDarwin +@skipIf(archs=no_match(["arm64"])) +class TestArmPointerMetadataStripping(TestBase): + def test(self): + self.build() + target, process, thread, bkpt = lldbutil.run_to_name_breakpoint(self, "foo") + + # Step over the first two instructions of foo in order to + # toggle the bit of fp and save it on the stack: + # orr x29, x29, #0x1000000000000000 + # stp x29, x30, [sp, #-16]! + # This is effectively adding metadata to the CFA of the caller frame (main). + thread.StepInstruction(False) + thread.StepInstruction(False) + + # The location of `argv` has been artificially made equal to the CFA of the frame. + # As such, it should have the metadata artificially set previously. + argv_addr = thread.frames[1].GetValueForVariablePath("&argv") + self.assertTrue(argv_addr.IsValid()) + argv_addr_uint = argv_addr.GetValueAsUnsigned() + self.assertNotEqual((argv_addr_uint & (1 << 60)), 0) + + # GetCFA strips metadata. + cfa = thread.frames[1].GetCFA() + self.assertEqual((cfa & (1 << 60)), 0) + + # If the test worked correctly, the cfa and the location should be identical, + # modulo the metadata. + self.assertEqual(cfa | (1 << 60), argv_addr_uint) diff --git a/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/main.s b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/main.s new file mode 100644 index 0000000000000..0825c5ddd08b5 --- /dev/null +++ b/lldb/test/API/macosx/arm-pointer-metadata-cfa-dwarf-expr/main.s @@ -0,0 +1,226 @@ +; The assembly below corresponds to this program: +; __attribute__((nodebug)) +; int foo() { +; return 10; +; } +; int main(int argc, char **argv) { +; foo(); +; return 0; +; } +; +; The assembly was edited in two places (search for "EDIT"): +; 1. A "orr x29, x29, #0x1000000000000000" instruction was added in foo. This +; effectively changes the CFA value of the frame above foo (i.e. main). +; 2. In main, the DWARF location of `argv` was changed to DW_AT_call_frame_cfa. +; +; This allows us to stop in foo, go to frame 1 (main) and do `v &argv`, +; obtaining the result of evaluating DW_AT_call_frame_cfa. + + .section __TEXT,__text,regular,pure_instructions + .globl _foo ; -- Begin function foo + .p2align 2 +_foo: ; @foo +Lfunc_begin0: + .cfi_startproc + orr x29, x29, #0x1000000000000000 ; EDIT: Set top byte of fp. + stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill + mov x29, sp + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + mov w0, #10 ; =0xa + ldp x29, x30, [sp], #16 ; 16-byte Folded Reload + ret +Lfunc_end0: + .cfi_endproc + ; -- End function + .globl _main ; -- Begin function main + .p2align 2 +_main: ; @main +Lfunc_begin1: + .file 1 "/test" "test.c" + .loc 1 6 0 ; test.c:6:0 + .cfi_startproc + sub sp, sp, #48 + stp x29, x30, [sp, #32] ; 16-byte Folded Spill + add x29, sp, #32 + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + mov w8, #0 ; =0x0 + str w8, [sp, #12] ; 4-byte Folded Spill + stur wzr, [x29, #-4] + stur w0, [x29, #-8] + str x1, [sp, #16] +Ltmp0: + bl _foo + ldr w0, [sp, #12] ; 4-byte Folded Reload + ldp x29, x30, [sp, #32] ; 16-byte Folded Reload + add sp, sp, #48 + ret +Ltmp1: +Lfunc_end1: + .cfi_endproc + ; -- End function + .section __DWARF,__debug_abbrev,regular,debug +Lsection_abbrev: + .byte 1 ; Abbreviation Code + .byte 17 ; DW_TAG_compile_unit + .byte 1 ; DW_CHILDREN_yes + .byte 37 ; DW_AT_producer + .byte 14 ; DW_FORM_strp + .byte 19 ; DW_AT_language + .byte 5 ; DW_FORM_data2 + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .ascii "\202|" ; DW_AT_LLVM_sysroot + .byte 14 ; DW_FORM_strp + .ascii "\357\177" ; DW_AT_APPLE_sdk + .byte 14 ; DW_FORM_strp + .byte 16 ; DW_AT_stmt_list + .byte 23 ; DW_FORM_sec_offset + .byte 27 ; DW_AT_comp_dir + .byte 14 ; DW_FORM_strp + .byte 17 ; DW_AT_low_pc + .byte 1 ; DW_FORM_addr + .byte 18 ; DW_AT_high_pc + .byte 6 ; DW_FORM_data4 + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 2 ; Abbreviation Code + .byte 46 ; DW_TAG_subprogram + .byte 1 ; DW_CHILDREN_yes + .byte 17 ; DW_AT_low_pc + .byte 1 ; DW_FORM_addr + .byte 18 ; DW_AT_high_pc + .byte 6 ; DW_FORM_data4 + .byte 64 ; DW_AT_frame_base + .byte 24 ; DW_FORM_exprloc + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .byte 58 ; DW_AT_decl_file + .byte 11 ; DW_FORM_data1 + .byte 59 ; DW_AT_decl_line + .byte 11 ; DW_FORM_data1 + .byte 39 ; DW_AT_prototyped + .byte 25 ; DW_FORM_flag_present + .byte 73 ; DW_AT_type + .byte 19 ; DW_FORM_ref4 + .byte 63 ; DW_AT_external + .byte 25 ; DW_FORM_flag_present + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 3 ; Abbreviation Code + .byte 5 ; DW_TAG_formal_parameter + .byte 0 ; DW_CHILDREN_no + .byte 2 ; DW_AT_location + .byte 24 ; DW_FORM_exprloc + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .byte 58 ; DW_AT_decl_file + .byte 11 ; DW_FORM_data1 + .byte 59 ; DW_AT_decl_line + .byte 11 ; DW_FORM_data1 + .byte 73 ; DW_AT_type + .byte 19 ; DW_FORM_ref4 + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 4 ; Abbreviation Code + .byte 36 ; DW_TAG_base_type + .byte 0 ; DW_CHILDREN_no + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .byte 62 ; DW_AT_encoding + .byte 11 ; DW_FORM_data1 + .byte 11 ; DW_AT_byte_size + .byte 11 ; DW_FORM_data1 + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 5 ; Abbreviation Code + .byte 15 ; DW_TAG_pointer_type + .byte 0 ; DW_CHILDREN_no + .byte 73 ; DW_AT_type + .byte 19 ; DW_FORM_ref4 + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 0 ; EOM(3) + .section __DWARF,__debug_info,regular,debug +Lsection_info: +Lcu_begin0: +.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ; Length of Unit + .long Lset0 +Ldebug_info_start0: + .short 4 ; DWARF version number +.set Lset1, Lsection_abbrev-Lsection_abbrev ; Offset Into Abbrev. Section + .long Lset1 + .byte 8 ; Address Size (in bytes) + .byte 1 ; Abbrev [1] 0xb:0x76 DW_TAG_compile_unit + .long 0 ; DW_AT_producer + .short 12 ; DW_AT_language + .long 47 ; DW_AT_name + .long 54 ; DW_AT_LLVM_sysroot + .long 165 ; DW_AT_APPLE_sdk +.set Lset2, Lline_table_start0-Lsection_line ; DW_AT_stmt_list + .long Lset2 + .long 180 ; DW_AT_comp_dir + .quad Lfunc_begin1 ; DW_AT_low_pc +.set Lset3, Lfunc_end1-Lfunc_begin1 ; DW_AT_high_pc + .long Lset3 + .byte 2 ; Abbrev [2] 0x32:0x36 DW_TAG_subprogram + .quad Lfunc_begin1 ; DW_AT_low_pc +.set Lset4, Lfunc_end1-Lfunc_begin1 ; DW_AT_high_pc + .long Lset4 + .byte 1 ; DW_AT_frame_base + .byte 109 + .long 247 ; DW_AT_name + .byte 1 ; DW_AT_decl_file + .byte 6 ; DW_AT_decl_line + ; DW_AT_prototyped + .long 107 ; DW_AT_type + ; DW_AT_external + .byte 3 ; Abbrev [3] 0x4b:0xe DW_TAG_formal_parameter + .byte 2 ; DW_AT_location + .byte 145 + .byte 120 + .long 256 ; DW_AT_name + .byte 1 ; DW_AT_decl_file + .byte 6 ; DW_AT_decl_line + .long 103 ; DW_AT_type + .byte 3 ; Abbrev [3] 0x59:0xe DW_TAG_formal_parameter + .byte 1 ; DW_AT_location + .byte 0x9c ; EDIT: DW_AT_call_frame_cfa + .long 261 ; DW_AT_name + .byte 1 ; DW_AT_decl_file + .byte 6 ; DW_AT_decl_line + .long 110 ; DW_AT_type + .byte 0 ; End Of Children Mark + .byte 4 ; Abbrev [4] 0x68:0x7 DW_TAG_base_type + .long 252 ; DW_AT_name + .byte 5 ; DW_AT_encoding + .byte 4 ; DW_AT_byte_size + .byte 5 ; Abbrev [5] 0x6f:0x5 DW_TAG_pointer_type + .long 115 ; DW_AT_type + .byte 5 ; Abbrev [5] 0x74:0x5 DW_TAG_pointer_type + .long 120 ; DW_AT_type + .byte 4 ; Abbrev [4] 0x79:0x7 DW_TAG_base_type + .long 266 ; DW_AT_name + .byte 6 ; DW_AT_encoding + .byte 1 ; DW_AT_byte_size + .byte 0 ; End Of Children Mark +Ldebug_info_end0: + .section __DWARF,__debug_str,regular,debug +Linfo_string: + .asciz "Apple clang " ; string offset=0 + .asciz "test.c" ; string offset=47 + .asciz "/Applications/Xcode..........................................................................................." ; string offset=54 + .asciz ".............." ; string offset=165 + .asciz "......................................................../llvm_src1" ; string offset=180 + .asciz "main" ; string offset=247 + .asciz "int" ; string offset=252 + .asciz "argc" ; string offset=256 + .asciz "argv" ; string offset=261 + .asciz "char" ; string offset=266 +.subsections_via_symbols + .section __DWARF,__debug_line,regular,debug +Lsection_line: +Lline_table_start0: From 9b0d7ddb04665e76cfa90b5d69c6183b90772243 Mon Sep 17 00:00:00 2001 From: Charitha Saumya <136391709+charithaintc@users.noreply.github.com> Date: Fri, 12 Sep 2025 09:37:04 -0700 Subject: [PATCH 151/734] [mlir][xegpu] Add support for `vector.multi_reduction` and `vector.shape_cast` SIMT distribution. (#157560) Add support for distributing the `vector.multi_reduction` operation across lanes in a warp. Currently only 2D to 1D reductions are supported. Given layouts for the source and accumulator vectors, * If the reduction dimension is distributed across lanes, the reduction is non-lane-local and the reduction is done using warp shuffles. Here we simply rewrite the `MultiDimReductionOp` to a sequence of `ReductionOp`s inside the warp op body. Actual distribution will be done by `WarpOpReduction` pattern. * If the reduction dimension is not distributed across lanes, the reduction is lane-local. In this case, we yield the source and accumulator vectors from the warp op and perform the lane-local reduction outside the warp op using a sequence of `ReductionOp`s. PR also adds support for distributing `vector.shape_cast` based on layouts. --- .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 67 ++-- .../mlir/Dialect/XeGPU/Transforms/Passes.td | 4 + mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 46 ++- .../XeGPU/Transforms/XeGPUBlocking.cpp | 20 +- .../Transforms/XeGPUSubgroupDistribute.cpp | 341 +++++++++++++++++- .../Transforms/XeGPUWgToSgDistribute.cpp | 8 +- .../Dialect/XeGPU/subgroup-distribute.mlir | 113 ++++++ .../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 3 +- 8 files changed, 530 insertions(+), 72 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index cfe3e800484ce..1f1d367118365 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -194,26 +194,29 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { InterfaceMethod<"Get the num of effective subgroups", "int64_t", "getNumSubgroups", (ins), [{ - std::optional> sgLayout = llvm::cast(tablegen_opaque_val).getSgLayoutAsInt(); + std::optional> sgLayout = llvm::cast(tablegen_opaque_val).getEffectiveSgLayoutAsInt(); if (sgLayout.has_value()) return computeProduct(*sgLayout); return 0; }], [{}]>, - InterfaceMethod<"Get the SgLayout field of the attribute as integer array", + InterfaceMethod<"Get the order of the layout attribute", + "DenseI32ArrayAttr", + "getOrder">, + InterfaceMethod<"Get the effective SgLayout of the layout attribute as integer array", "SmallVector", - "getSgLayoutAsInt">, - InterfaceMethod<"Get the SgData field of the attribute as integer array", + "getEffectiveSgLayoutAsInt">, + InterfaceMethod<"Get the effective SgData of the layout attribute as integer array", "SmallVector", - "getSgDataAsInt">, - InterfaceMethod<"Get the InstData field of the attribute as integer array", + "getEffectiveSgDataAsInt">, + InterfaceMethod<"Get the effective InstData of the layout attribute as integer array", "SmallVector", - "getInstDataAsInt">, - InterfaceMethod<"Get the LaneLayout field of the attribute as integer array", + "getEffectiveInstDataAsInt">, + InterfaceMethod<"Get the effective LaneLayout of the layout attribute as integer array", "SmallVector", - "getLaneLayoutAsInt">, - InterfaceMethod<"Get the LaneData field of the attribute as integer array", + "getEffectiveLaneLayoutAsInt">, + InterfaceMethod<"Get the effective LaneData of the layout attribute as integer array", "SmallVector", - "getLaneDataAsInt">, + "getEffectiveLaneDataAsInt">, InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData", "xegpu::DistributeLayoutAttr", "dropSgLayoutAndData">, @@ -231,7 +234,11 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { multiple blocks according to round-robin distribution rules.}], "FailureOr>>", "getOffsets", - (ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId, "ArrayRef":$shape)> + (ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId, "ArrayRef":$shape)>, + InterfaceMethod ]; } @@ -391,31 +398,31 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> { getLaneLayout(), getLaneData(), getOrder()); } - SmallVector getSgLayoutAsInt() const { + SmallVector getEffectiveSgLayoutAsInt() const { if (DenseI32ArrayAttr layout = getSgLayout()) return llvm::to_vector_of(layout.asArrayRef()); return {}; } - SmallVector getSgDataAsInt() const { + SmallVector getEffectiveSgDataAsInt() const { if (DenseI32ArrayAttr data = getSgData()) return llvm::to_vector_of(data.asArrayRef()); return {}; } - SmallVector getInstDataAsInt() const { + SmallVector getEffectiveInstDataAsInt() const { if (DenseI32ArrayAttr inst = getInstData()) return llvm::to_vector_of(inst.asArrayRef()); return {}; } - SmallVector getLaneLayoutAsInt() const { + SmallVector getEffectiveLaneLayoutAsInt() const { if (DenseI32ArrayAttr layout = getLaneLayout()) return llvm::to_vector_of(layout.asArrayRef()); return {}; } - SmallVector getLaneDataAsInt() const { + SmallVector getEffectiveLaneDataAsInt() const { if (DenseI32ArrayAttr data = getLaneData()) return llvm::to_vector_of(data.asArrayRef()); return {}; @@ -433,6 +440,9 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> { FailureOr>> getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef shape); + /// Check if this is slice of some other layout. + bool isSliceOf(const xegpu::DistributeLayoutAttr &other) { return false; } + }]; let assemblyFormat = "`<` struct(params) `>`"; @@ -499,10 +509,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { /// Returns the SgLayout of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - SmallVector getSgLayoutAsInt() const { + SmallVector getEffectiveSgLayoutAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - auto layout = parent.getSgLayoutAsInt(); + auto layout = parent.getEffectiveSgLayoutAsInt(); if (layout.size()) { ArrayRef dims = attr.getDims().asArrayRef(); return XeGPUDialect::slice(ArrayRef(layout), dims); @@ -512,10 +522,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { /// Returns the SgData of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - SmallVector getSgDataAsInt() const { + SmallVector getEffectiveSgDataAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - auto data = parent.getSgDataAsInt(); + auto data = parent.getEffectiveSgDataAsInt(); if (data.size()) { ArrayRef dims = attr.getDims().asArrayRef(); return XeGPUDialect::slice(ArrayRef(data), dims); @@ -525,10 +535,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { /// Returns the InstData of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - SmallVector getInstDataAsInt() const { + SmallVector getEffectiveInstDataAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - auto inst = parent.getInstDataAsInt(); + auto inst = parent.getEffectiveInstDataAsInt(); if (inst.size()) { ArrayRef dims = attr.getDims().asArrayRef(); return XeGPUDialect::slice(llvm::ArrayRef(inst), dims); @@ -538,10 +548,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { /// Returns the LaneLayout of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - SmallVector getLaneLayoutAsInt() const { + SmallVector getEffectiveLaneLayoutAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - auto layout = parent.getLaneLayoutAsInt(); + auto layout = parent.getEffectiveLaneLayoutAsInt(); if (layout.size()) { ArrayRef dims = attr.getDims().asArrayRef(); return XeGPUDialect::slice(llvm::ArrayRef(layout), dims); @@ -551,10 +561,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { /// Returns the LaneData of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - SmallVector getLaneDataAsInt() const { + SmallVector getEffectiveLaneDataAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - auto data = parent.getLaneDataAsInt(); + auto data = parent.getEffectiveLaneDataAsInt(); if (data.size()) { ArrayRef dims = attr.getDims().asArrayRef(); return XeGPUDialect::slice(llvm::ArrayRef(data), dims); @@ -594,6 +604,9 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { FailureOr>> getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef shape); + /// Check if this is slice of some other layout. + bool isSliceOf(const xegpu::DistributeLayoutAttr &other); + }]; let assemblyFormat = "`<` qualified($parent) `,` `dims` `=` $dims `>`"; diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td index ddf6b4ac85a90..59dca9f0d852a 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td @@ -27,6 +27,10 @@ def XeGPUSubgroupDistribute : Pass<"xegpu-subgroup-distribute"> { }]; let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect"]; + let options = [Option< + "enableSGReductions", "enable-sg-reductions", "bool", + /*default=*/"true", + "Enable subgroup reductions using subgroup shuffles.">]; } def XeGPUPropagateLayout : Pass<"xegpu-propagate-layout"> { diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 7f3be7f91c56b..94c5509fd7c29 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -133,22 +133,23 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef shape, }; // check the sgLayout and sgData - auto maybeSgShape = - tryDistribute(shape, attr.getSgLayoutAsInt(), attr.getSgDataAsInt()); + auto maybeSgShape = tryDistribute(shape, attr.getEffectiveSgLayoutAsInt(), + attr.getEffectiveSgDataAsInt()); if (!maybeSgShape) return false; auto sgShape = maybeSgShape.value(); // check InstData, it neither have layout nor need round-robin auto maybeInstShape = - tryDistribute(sgShape, {}, attr.getInstDataAsInt(), false); + tryDistribute(sgShape, {}, attr.getEffectiveInstDataAsInt(), false); if (!maybeInstShape) return false; auto instShape = maybeInstShape.value(); // check LaneLayout and LaneData - auto maybeLaneShape = tryDistribute(instShape, attr.getLaneLayoutAsInt(), - attr.getLaneDataAsInt(), false); + auto maybeLaneShape = + tryDistribute(instShape, attr.getEffectiveLaneLayoutAsInt(), + attr.getEffectiveLaneDataAsInt(), false); return maybeLaneShape.has_value(); } @@ -282,9 +283,10 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc, if (!hasDefaultOrder()) return mlir::emitError(loc, "order attribute is currently not supported."); - auto dims = llvm::map_to_vector(getSgLayoutAsInt(), [&](int64_t d) -> Value { - return builder.createOrFold(loc, d); - }); + auto dims = + llvm::map_to_vector(getEffectiveSgLayoutAsInt(), [&](int64_t d) -> Value { + return builder.createOrFold(loc, d); + }); return affine::delinearizeIndex(builder, loc, linearId, dims); } @@ -298,8 +300,8 @@ LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId, if (!isForWorkgroup()) return failure(); - SmallVector sgLayout = getSgLayoutAsInt(); - SmallVector sgShape = getSgDataAsInt(); + SmallVector sgLayout = getEffectiveSgLayoutAsInt(); + SmallVector sgShape = getEffectiveSgDataAsInt(); if (sgShape.empty()) { if (auto derivedShape = computeShapeRatio(shape, sgLayout)) sgShape = derivedShape.value(); @@ -385,8 +387,8 @@ SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId, if (!isForWorkgroup()) return failure(); - SmallVector sgLayout = getSgLayoutAsInt(); - SmallVector sgShape = getSgDataAsInt(); + SmallVector sgLayout = getEffectiveSgLayoutAsInt(); + SmallVector sgShape = getEffectiveSgDataAsInt(); if (sgShape.empty()) { if (auto derivedShape = computeShapeRatio(shape, sgLayout)) sgShape = derivedShape.value(); @@ -409,6 +411,26 @@ SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId, shape); } +bool SliceAttr::isSliceOf(const xegpu::DistributeLayoutAttr &other) { + auto flattenedThis = flatten(); + // If other is a LayoutAttr, just compare directly with parent of + // flattenedThis. + if (auto otherLayout = dyn_cast(other)) + return flattenedThis.getParent() == otherLayout; + // If other is a SliceAttr, flatten it first before comparing. + auto flattenedOther = dyn_cast(other).flatten(); + // Both must have common parent LayoutAttr. + if (flattenedThis.getParent() != flattenedOther.getParent()) + return false; + // otherFlattened's sliced dims must be a subset of flattenedThis's sliced + // dims. + llvm::SmallDenseSet thisDims( + flattenedThis.getDims().asArrayRef().begin(), + flattenedThis.getDims().asArrayRef().end()); + return llvm::all_of(flattenedOther.getDims().asArrayRef(), + [&](int64_t dim) { return thisDims.contains(dim); }); +} + //===----------------------------------------------------------------------===// // XeGPU_RangeAttr //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index 5d5ff69e06886..7efa4b9fbd934 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -85,16 +85,16 @@ struct ConvertLayoutOpPattern using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(xegpu::ConvertLayoutOp op, PatternRewriter &rewriter) const override { - xegpu::DistributeLayoutAttr input_layout = op.getInputLayoutAttr(); - xegpu::DistributeLayoutAttr target_layout = op.getTargetLayoutAttr(); - if (input_layout.getInstDataAsInt().empty() || - target_layout.getInstDataAsInt().empty()) + xegpu::DistributeLayoutAttr inputLayout = op.getInputLayoutAttr(); + xegpu::DistributeLayoutAttr targetLayout = op.getTargetLayoutAttr(); + if (inputLayout.getEffectiveInstDataAsInt().empty() || + targetLayout.getEffectiveInstDataAsInt().empty()) return rewriter.notifyMatchFailure(op, "Not a target ConvertLayoutOp."); - input_layout = input_layout.dropInstData(); - target_layout = target_layout.dropInstData(); + inputLayout = inputLayout.dropInstData(); + targetLayout = targetLayout.dropInstData(); auto newOp = rewriter.createOrFold( - op.getLoc(), op.getType(), op.getSource(), input_layout, target_layout); + op.getLoc(), op.getType(), op.getSource(), inputLayout, targetLayout); rewriter.replaceOp(op, newOp); return success(); } @@ -145,8 +145,8 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(operandOrResult); if (layout && layout.isForSubgroup()) { - if (!layout.getInstDataAsInt().empty()) - return layout.getInstDataAsInt(); + if (!layout.getEffectiveInstDataAsInt().empty()) + return layout.getEffectiveInstDataAsInt(); if (auto type = dyn_cast(value.getType())) return llvm::to_vector(type.getShape()); @@ -226,7 +226,7 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const { Type valTy = value.getType(); if (auto tdescTy = dyn_cast(valTy)) { xegpu::DistributeLayoutAttr layout = tdescTy.getLayoutAttr(); - return layout && !layout.getInstDataAsInt().empty(); + return layout && !layout.getEffectiveInstDataAsInt().empty(); } auto shapedType = dyn_cast(valTy); return shapedType && !llvm::equal(tileShape, shapedType.getShape()); diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp index b33669259249a..21c1583bf2633 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp @@ -58,6 +58,12 @@ namespace { // SIMT Distribution Patterns //===----------------------------------------------------------------------===// +/// In certain cases, we may need to favor XeGPU specific distribution patterns +/// over generic vector distribution patterns. In such cases, we can assign +/// priorities to patterns. +static constexpr unsigned regularPatternBenefit = 1; +static constexpr unsigned highPatternBenefit = 2; + /// Helper function to get distributed vector type for a source vector type /// according to the lane_layout. We simply divide each dimension of tensor /// descriptor shape by corresponding lane_layout dimension. If @@ -72,27 +78,31 @@ namespace { /// | 32x16 | [2, 8] | 16x2 | /// | 2x32x16 | [1, 16] | 2x32x1 | static FailureOr -getDistVecTypeBasedOnLaneLayout(xegpu::LayoutAttr layout, +getDistVecTypeBasedOnLaneLayout(xegpu::DistributeLayoutAttr layout, VectorType originalType) { if (!layout) return failure(); - - auto laneLayout = layout.getLaneLayout().asArrayRef(); - assert(originalType.getShape().size() >= laneLayout.size() && + assert((isa(layout) || isa(layout)) && + "Expecting a valid layout."); + SmallVector effectiveLaneLayout = + layout.getEffectiveLaneLayoutAsInt(); + assert(static_cast(originalType.getRank()) >= + effectiveLaneLayout.size() && "Rank of the original vector type should be greater or equal to the " "size of the lane layout to distribute the vector type."); SmallVector distributedShape(originalType.getShape()); // Only distribute the last `laneLayout.size()` dimensions. The remaining // dimensions are not distributed. - unsigned distributionStart = originalType.getRank() - laneLayout.size(); + unsigned distributionStart = + originalType.getRank() - effectiveLaneLayout.size(); for (auto [i, dim] : llvm::enumerate(originalType.getShape())) { if (i < distributionStart) continue; // Check if the dimension can be distributed evenly. - if (dim % laneLayout[i - distributionStart] != 0) + if (dim % effectiveLaneLayout[i - distributionStart] != 0) return failure(); - distributedShape[i] = dim / laneLayout[i - distributionStart]; + distributedShape[i] = dim / effectiveLaneLayout[i - distributionStart]; } return VectorType::get(distributedShape, originalType.getElementType()); } @@ -1001,12 +1011,282 @@ struct LoadDistribution final : public gpu::WarpDistributionPattern { } }; +/// Helper to rewrite a 2D VectorMultiReductionOp into a sequence of 1D +/// VectorReductionOps. +static Value lowerToVectorReductions(TypedValue src, + TypedValue acc, + vector::CombiningKind kind, + int64_t reductionDim, Location loc, + PatternRewriter &rewriter) { + // Expecting a 2D source vector. + assert(src.getType().getRank() == 2 && "expected a 2D source vector"); + VectorType sourceType = src.getType(); + int64_t sourceH = sourceType.getShape()[0]; + int64_t sourceW = sourceType.getShape()[1]; + int nSlices = (reductionDim == 0) ? sourceW : sourceH; + // Create a constant vector to hold the result of the reduction. + TypedAttr zeroAttr = rewriter.getZeroAttr(sourceType.getElementType()); + Value reductionResult = arith::ConstantOp::create( + rewriter, loc, acc.getType(), + DenseElementsAttr::get(acc.getType(), zeroAttr)); + // For each slice of the source, extract the slice vector, do a reduction + // and, insert the reduced value back to the result vector. + for (int i = 0; i < nSlices; ++i) { + SmallVector sliceOffsets, sliceSizes; + if (reductionDim == 1) { + sliceOffsets = {i, 0}; + sliceSizes = {1, sourceW}; + } else { + sliceOffsets = {0, i}; + sliceSizes = {sourceH, 1}; + } + vector::ExtractStridedSliceOp extractOp = + vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets, + sliceSizes, {1, 1}); + int64_t nSliceElements = extractOp.getResult().getType().getNumElements(); + Value slice = vector::ShapeCastOp::create( + rewriter, loc, + VectorType::get({nSliceElements}, sourceType.getElementType()), + extractOp.getResult()); + Value accExtract = vector::ExtractOp::create(rewriter, loc, acc, i); + Value reduction = + vector::ReductionOp::create(rewriter, loc, kind, slice, accExtract); + reductionResult = + vector::InsertOp::create(rewriter, loc, reduction, reductionResult, i); + } + return reductionResult; +} + +/// This patterns distribute the `vector.multi_reduction` operation across +/// lanes in a warp. Currently only 2D to 1D reductions are supported. Given +/// layouts for the source and accumulator vectors, +/// * If the reduction dimension is distributed across lanes, the reduction is +/// non-lane-local and the reduction is done using warp shuffles. Here we +/// simply rewrite the MultiDimReductionOp to a sequence of ReductionOps in +/// the warp op body. +/// * If the reduction dimension is not distributed across lanes, the reduction +/// is lane-local. In this case, we yield the source and accumulator vectors +/// from the warp op and perform the lane-local reduction outside the warp op +/// using a sequence of ReductionOps. +/// Example 1 (Reduction is lane-local): +/// ``` +/// %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<1xf32>) { +/// %0 = "some_def"() : () -> (vector<16x32xf32>) +/// %acc = "some_def"() : () -> (vector<32xf32>) +/// %1 = vector.multi_reduction , %0, %acc [0] : vector<16x32xf32> to +/// vector<32xf32> gpu.yield %1 : vector<32xf32> +/// } +/// ``` +/// is lowered to: +/// ``` +/// %r:2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<16x1xf32>, +/// vector<1xf32>) { +/// %0 = "some_def"() : () -> (vector<16x32xf32>) +/// %acc = "some_def"() : () -> (vector<32xf32>) +/// gpu.yield %0, %acc : vector<16x32xf32>, vector<32xf32> +/// } +/// %c = arith.constant dense<0.0> : vector<1xf32> +/// %1 = vector.shape_cast %r#0 : vector<16x1xf32> to vector<16xf32> +/// %2 = vector.reduction , %1, %r#1 : vector<16xf32> to f32 +/// %3 = vector.insert %2, %c[0] : f32 into vector<1xf32> +/// ``` +/// Example 2 (Reduction is non-lane-local): +/// ``` +/// %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<2xf32>) { +/// %0 = "some_def"() : () -> (vector<2x32xf32>) +/// %acc = "some_def"() : () -> (vector<2xf32>) +/// %1 = vector.multi_reduction , %0, %acc [1] : vector<2x32xf32> to +/// vector<2xf32> +/// gpu.yield %1 : vector<2xf32> +/// } +/// ``` +/// is lowered to: +/// ``` +/// %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<2xf32>) { +/// %0 = "some_def"() : () -> (vector<2x32xf32>) +/// %acc = "some_def"() : () -> (vector<2xf32>) +/// %1 = arith.constant dense<0.0> : vector<2xf32> +/// %2 = vector.extract %0[0] : vector<32xf32> from > +/// %3 = ("warp.reduction %2") : f32 +/// %4 = vector.insert %3, %1[0] : f32 into vector<2xf32> +/// ... repeat for row 1 +/// gpu.yield %1 : vector<2xf32> +/// } +struct VectorMultiReductionDistribution : public gpu::WarpDistributionPattern { + using gpu::WarpDistributionPattern::WarpDistributionPattern; + LogicalResult matchAndRewrite(gpu::WarpExecuteOnLane0Op warpOp, + PatternRewriter &rewriter) const override { + OpOperand *yieldOperand = + getWarpResult(warpOp, llvm::IsaPred); + if (!yieldOperand) + return failure(); + auto reductionOp = + cast(yieldOperand->get().getDefiningOp()); + unsigned operandNumber = yieldOperand->getOperandNumber(); + VectorType sourceType = reductionOp.getSourceVectorType(); + // Only 2D vectors are supported. + if (sourceType.getRank() != 2) + return rewriter.notifyMatchFailure(warpOp, + "Only 2D reductions are supported."); + ArrayRef reductionDims = reductionOp.getReductionDims(); + // Only 1 reduction dimension supported. This also ensures that the result + // is vector type. + if (reductionDims.size() != 1) + return rewriter.notifyMatchFailure( + warpOp, "Only 1 reduction dimension is supported."); + int64_t reductionDim = reductionDims[0]; + VectorType distributedResultType = + cast(warpOp.getResult(operandNumber).getType()); + VectorType resultType = cast(reductionOp.getType()); + xegpu::DistributeLayoutAttr sourceLayout = + xegpu::getDistributeLayoutAttr(reductionOp.getSource()); + + FailureOr sourceDistTypeOrFailure = + getDistVecTypeBasedOnLaneLayout(sourceLayout, sourceType); + if (failed(sourceDistTypeOrFailure)) + return rewriter.notifyMatchFailure( + warpOp, "Failed to distribute the source vector type."); + VectorType sourceDistType = sourceDistTypeOrFailure.value(); + // Only single dimension distribution is supported. + bool dim0Distributed = + sourceDistType.getShape()[0] != sourceType.getShape()[0]; + bool dim1Distributed = + sourceDistType.getShape()[1] != sourceType.getShape()[1]; + if (dim0Distributed && dim1Distributed) + return rewriter.notifyMatchFailure( + warpOp, "Expecting source to be distributed in a single dimension."); + int64_t sourceDistDim = dim0Distributed ? 0 : (dim1Distributed ? 1 : -1); + if (sourceDistDim == -1) + return rewriter.notifyMatchFailure( + warpOp, "Expecting a distributed source vector."); + bool resultDistributed = + distributedResultType.getNumElements() < resultType.getNumElements(); + // If the lane owns all the data required for reduction (i.e. reduction is + // fully parallel accross lanes), then each lane owns part of the result + // (i.e. result is distributed). If the reduction require cross-lane + // shuffling, then the result is shared among all lanes (broadcasted). + // Therefore we expect following cases: + // + // | Source vector | Reduction dim | Result vector | + // |----------------------|----------------|----------------| + // | dim-0 distributed | 0 | broadcasted | + // | dim-0 distributed | 1 | distributed | + // | dim-1 distributed | 0 | distributed | + // | dim-1 distributed | 1 | broadcasted | + + bool isReductionLaneLocal = (sourceDistDim == 0 && reductionDim == 1) || + (sourceDistDim == 1 && reductionDim == 0); + if (isReductionLaneLocal && !resultDistributed) + return rewriter.notifyMatchFailure( + warpOp, "Expecting a distributed result for lane-local reduction."); + + if (!isReductionLaneLocal && resultDistributed) + return rewriter.notifyMatchFailure( + warpOp, + "Expecting a broadcasted result for non-lane-local reduction."); + + // Handle lane-local reduction case. In this case we fully distribute the + // reduction result. + if (isReductionLaneLocal) { + // Yield the source and acc vectors from the WarpOp. + SmallVector newRetIndices; + auto newWarpOp = moveRegionToNewWarpOpAndAppendReturns( + rewriter, warpOp, {reductionOp.getSource(), reductionOp.getAcc()}, + {sourceDistType, distributedResultType}, newRetIndices); + rewriter.setInsertionPointAfter(newWarpOp); + Value result = lowerToVectorReductions( + cast>(newWarpOp->getResult(newRetIndices[0])), + cast>(newWarpOp->getResult(newRetIndices[1])), + reductionOp.getKind(), reductionDim, reductionOp.getLoc(), rewriter); + // Replace the warp op result with the final result. + rewriter.replaceAllUsesWith(reductionOp.getResult(), result); + return success(); + } + // For non-lane-local case, we simply rewrite the MultiReductionOp in terms + // of multiple ReductionOps. Actual distribution is done by the + // WarpOpReduction pattern. + rewriter.setInsertionPointAfter(reductionOp); + Value result = lowerToVectorReductions( + cast>(reductionOp.getSource()), + cast>(reductionOp.getAcc()), + reductionOp.getKind(), reductionDim, reductionOp.getLoc(), rewriter); + // Replace the warp op result with the final result. + rewriter.replaceAllUsesWith(reductionOp.getResult(), result); + return success(); + } +}; + +/// Distribute a `vector.shape_cast` op feeding into yield op of an enclosing +/// `gpu.warp_execute_on_lane_0` region. +struct VectorShapeCastDistribution : public gpu::WarpDistributionPattern { + using gpu::WarpDistributionPattern::WarpDistributionPattern; + LogicalResult matchAndRewrite(gpu::WarpExecuteOnLane0Op warpOp, + PatternRewriter &rewriter) const override { + OpOperand *yieldOperand = + getWarpResult(warpOp, llvm::IsaPred); + if (!yieldOperand) + return failure(); + auto shapeCastOp = + cast(yieldOperand->get().getDefiningOp()); + unsigned operandNumber = yieldOperand->getOperandNumber(); + auto resultDistTy = + cast(warpOp.getResult(operandNumber).getType()); + xegpu::DistributeLayoutAttr sourceLayout = + xegpu::getDistributeLayoutAttr(shapeCastOp.getSource()); + xegpu::DistributeLayoutAttr resultLayout = + xegpu::getDistributeLayoutAttr(shapeCastOp.getResult()); + if (!sourceLayout || !resultLayout) + return rewriter.notifyMatchFailure( + warpOp, + "the source or result of shape_cast op lacks distribution layout"); + + // For rank reducing or increasing shape_cast ops, the lower rank layout + // must be a slice of higher rank layout. + int64_t sourceRank = shapeCastOp.getSourceVectorType().getRank(); + int64_t resultRank = shapeCastOp.getResultVectorType().getRank(); + if (sourceRank < resultRank && !sourceLayout.isSliceOf(resultLayout)) + return rewriter.notifyMatchFailure( + warpOp, "shape_cast is rank reducing but source layout is not a " + "slice of result layout"); + if (sourceRank > resultRank && !resultLayout.isSliceOf(sourceLayout)) + return rewriter.notifyMatchFailure( + warpOp, "shape_cast is rank increasing but result layout is not a " + "slice of source layout"); + + FailureOr sourceDistTypeOrFailure = + getDistVecTypeBasedOnLaneLayout(sourceLayout, + shapeCastOp.getSourceVectorType()); + if (failed(sourceDistTypeOrFailure)) + return rewriter.notifyMatchFailure( + warpOp, "failed to get distributed vector type for source"); + VectorType sourceDistType = sourceDistTypeOrFailure.value(); + // Create a new warp op that yields the source of the shape_cast op. + SmallVector newRetIndices; + auto newWarpOp = moveRegionToNewWarpOpAndAppendReturns( + rewriter, warpOp, {shapeCastOp.getSource()}, {sourceDistType}, + newRetIndices); + rewriter.setInsertionPointAfter(newWarpOp); + Value source = newWarpOp.getResult(newRetIndices[0]); + // Create a new shape_cast op outside the warp op. + Value newShapeCast = vector::ShapeCastOp::create( + rewriter, shapeCastOp.getLoc(), resultDistTy, source); + rewriter.replaceAllUsesWith(newWarpOp.getResult(operandNumber), + newShapeCast); + return success(); + } +}; + } // namespace namespace { struct XeGPUSubgroupDistributePass final : public xegpu::impl::XeGPUSubgroupDistributeBase< XeGPUSubgroupDistributePass> { + XeGPUSubgroupDistributePass() = default; + XeGPUSubgroupDistributePass(const XeGPUSubgroupDistributePass &other) = + default; + XeGPUSubgroupDistributePass(xegpu::XeGPUSubgroupDistributeOptions options) + : XeGPUSubgroupDistributeBase(options) {} void runOnOperation() override; }; } // namespace @@ -1016,8 +1296,13 @@ void xegpu::populateXeGPUSubgroupDistributePatterns( patterns .add( - patterns.getContext()); + GpuBarrierDistribution, VectorMultiReductionDistribution, + LoadDistribution, StoreDistribution>( + patterns.getContext(), + /*pattern benefit=*/regularPatternBenefit); + patterns.add( + patterns.getContext(), + /*pattern benefit=*/highPatternBenefit); } void XeGPUSubgroupDistributePass::runOnOperation() { @@ -1032,8 +1317,7 @@ void XeGPUSubgroupDistributePass::runOnOperation() { if (!isa(operand.get().getType())) continue; - auto layout = - xegpu::getDistributeLayoutAttrOfType(operand); + auto layout = xegpu::getDistributeLayoutAttr(operand.get()); if (!layout) { op->emitError("Could not find layout attribute for operand ") << operand.getOperandNumber() << " of operation " << op->getName(); @@ -1074,18 +1358,15 @@ void XeGPUSubgroupDistributePass::runOnOperation() { if (vecRank == 0) return AffineMap::get(val.getContext()); // Get the layout of the vector type. - // TODO: support more layout types - auto layout = xegpu::getDistributeLayoutAttrOfType(val); + xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(val); // If no layout is specified, assume the inner most dimension is distributed // for now. if (!layout) return AffineMap::getMultiDimMapWithTargets( vecRank, {static_cast(vecRank - 1)}, val.getContext()); SmallVector distributedDims; - // Get the distributed dimensions based on the layout. - ArrayRef laneLayout = layout.getLaneLayout().asArrayRef(); - for (unsigned i = 0; i < laneLayout.size(); ++i) { - if (laneLayout[i] > 1) + for (auto [i, v] : llvm::enumerate(layout.getEffectiveLaneLayoutAsInt())) { + if (v > 1) distributedDims.push_back(i); } return AffineMap::getMultiDimMapWithTargets(vecRank, distributedDims, @@ -1094,8 +1375,32 @@ void XeGPUSubgroupDistributePass::runOnOperation() { // TODO: shuffleFn is not used. auto shuffleFn = [](Location loc, OpBuilder &builder, Value val, Value srcIdx, int64_t warpSz) { return Value(); }; + + auto warpReduction = [](Location loc, OpBuilder &builder, Value input, + vector::CombiningKind kind, uint32_t size) { + // First reduce on a single thread to get per lane reduction value. + Value laneVal = builder.create(loc, kind, input); + // Parallel reduction using butterfly shuffles. + for (uint64_t i = 1; i < size; i <<= 1) { + Value shuffled = + builder + .create(loc, laneVal, i, + /*width=*/size, + /*mode=*/gpu::ShuffleMode::XOR) + .getShuffleResult(); + laneVal = makeArithReduction(builder, loc, kind, laneVal, shuffled); + } + return laneVal; + }; + + if (enableSGReductions) + vector::populateDistributeReduction( + patterns, warpReduction, + /*pattern benefit=*/regularPatternBenefit); + vector::populatePropagateWarpVectorDistributionPatterns( - patterns, distributionFn, shuffleFn); + patterns, distributionFn, shuffleFn, + /*pattern benefit=*/regularPatternBenefit); if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) { signalPassFailure(); return; diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 5d0f1d18402f2..3f48400fedf5e 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -52,9 +52,9 @@ getSgShapeAndCount(ArrayRef shape, int count = 1; SmallVector sgShape(shape); if (layout && layout.isForWorkgroup()) { - SmallVector sgLayout = layout.getSgLayoutAsInt(); - if (!layout.getSgDataAsInt().empty()) - sgShape = layout.getSgDataAsInt(); + SmallVector sgLayout = layout.getEffectiveSgLayoutAsInt(); + if (!layout.getEffectiveSgDataAsInt().empty()) + sgShape = layout.getEffectiveSgDataAsInt(); else if (auto maybeDerivedSgData = computeShapeRatio(shape, sgLayout)) sgShape = *maybeDerivedSgData; SmallVector distUnit = computeElementwiseMul(sgLayout, sgShape); @@ -488,7 +488,7 @@ struct WgToSgVectorBroadcastOp VectorType::get(sgShape, resultType.getElementType()); // Check if the output layout is distributable - SmallVector sgLayout = layout.getSgLayoutAsInt(); + SmallVector sgLayout = layout.getEffectiveSgLayoutAsInt(); if (sgLayout.empty()) return failure(); diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir index 60acea06c9a12..30ca9816df5bc 100644 --- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir +++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir @@ -1,5 +1,8 @@ // RUN: mlir-opt -xegpu-subgroup-distribute -allow-unregistered-dialect -canonicalize -cse -split-input-file %s | FileCheck %s +// RUN: mlir-opt -xegpu-subgroup-distribute="enable-sg-reductions=false" -allow-unregistered-dialect \ +// RUN: -canonicalize -cse -split-input-file %s | FileCheck %s --check-prefix=CHECK-REDUCTION + // CHECK-LABEL: gpu.func @store_nd_1d // CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>) { // CHECK-DAG: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<1xf32> @@ -320,6 +323,116 @@ gpu.module @test { } } +// ----- +// CHECK-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction +// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> +// CHECK-SAME: (!xegpu.tensor_desc<1x32xf32, #xegpu.layout>, vector<16x2xf32>) { +// CHECK: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout} : () -> vector<16x32xf32> +// CHECK-NEXT: gpu.yield %{{.*}}, %[[SRC]] : !xegpu.tensor_desc<1x32xf32, #xegpu.layout>, vector<16x32xf32> +// CHECK-NEXT: } +// CHECK: %[[COL0:.*]] = vector.extract_strided_slice %[[W]]#1 {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> +// CHECK-NEXT: %[[CAST0:.*]] = vector.shape_cast %[[COL0]] : vector<16x1xf32> to vector<16xf32> +// CHECK-NEXT: %[[RED0:.*]] = vector.reduction , %[[CAST0]], %{{.*}} : vector<16xf32> into f32 +// CHECK: %[[COL1:.*]] = vector.extract_strided_slice %[[W]]#1 {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> +// CHECK-NEXT: %[[CAST1:.*]] = vector.shape_cast %[[COL1]] : vector<16x1xf32> to vector<16xf32> +// CHECK-NEXT: %[[RED1:.*]] = vector.reduction , %[[CAST1]], %{{.*}} : vector<16xf32> into f32 +// CHECK-NEXT: vector.from_elements %[[RED0]], %[[RED1]] : vector<2xf32> +gpu.module @test { +gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction() { + %0 = "some_def"() : () -> !xegpu.tensor_desc<1x32xf32, #xegpu.layout> + %src = "some_def"() {layout_result_0 = #xegpu.layout} : () -> (vector<16x32xf32>) + %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} dense<0.0> : vector<32xf32> + %1 = vector.multi_reduction , %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} [0] + : vector<16x32xf32> to vector<32xf32> + %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout} + : vector<32xf32> to vector<1x32xf32> + xegpu.store_nd %3, %0 : vector<1x32xf32>, !xegpu.tensor_desc<1x32xf32, #xegpu.layout> + gpu.return +} +} + +// ----- +// CHECK-REDUCTION-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction +// CHECK-REDUCTION: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (!xegpu.tensor_desc<2x16xf32, +// CHECK-REDUCTION-SAME: #xegpu.layout>, f32, f32) { +// CHECK-REDUCTION: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout} : () -> vector<2x16xf32> +// CHECK-REDUCTION-NEXT: %[[ROW0:.*]] = vector.extract %[[SRC]][0] : vector<16xf32> from vector<2x16xf32> +// CHECK-REDUCTION-NEXT: %[[R0:.*]] = vector.reduction , %[[ROW0]], %{{.*}} : vector<16xf32> into f32 +// CHECK-REDUCTION-NEXT: %[[ROW1:.*]] = vector.extract %[[SRC]][1] : vector<16xf32> from vector<2x16xf32> +// CHECK-REDUCTION-NEXT: %[[R1:.*]] = vector.reduction , %[[ROW1]], %{{.*}} : vector<16xf32> into f32 +// CHECK-REDUCTION-NEXT: gpu.yield %4, %[[R1]], %[[R0]] : !xegpu.tensor_desc<2x16xf32, #xegpu.layout>, f32, f32 +// CHECK-REDUCTION-NEXT: } +// CHECK-REDUCTION-NEXT: vector.from_elements %[[W]]#2, %[[W]]#1 : vector<2xf32> +gpu.module @test { +gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction() { + %0 = "some_def"() : () -> !xegpu.tensor_desc<2x16xf32, #xegpu.layout> + %src = "some_def"() {layout_result_0 = #xegpu.layout} : () -> (vector<2x16xf32>) + %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1]>} dense<0.0> : vector<2xf32> + %1 = vector.multi_reduction , %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1]>} + [1] : vector<2x16xf32> to vector<2xf32> + %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout} + : vector<2xf32> to vector<2x1xf32> + %4 = vector.broadcast %3 {layout_result_0 = #xegpu.layout} : vector<2x1xf32> to vector<2x16xf32> + xegpu.store_nd %4, %0 : vector<2x16xf32>, !xegpu.tensor_desc<2x16xf32, #xegpu.layout> + gpu.return +} +} + +// ----- +// CHECK-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction +// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%0)[16] -> +// CHECK-SAME: (!xegpu.tensor_desc<32x1xf32, #xegpu.layout>, vector<2x16xf32>) { +// CHECK: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout} : () -> vector<32x16xf32> +// CHECK-NEXT: gpu.yield %{{.*}}, %[[SRC]] : !xegpu.tensor_desc<32x1xf32, #xegpu.layout>, vector<32x16xf32> +// CHECK-NEXT: } +// CHECK: %[[ROW0:.*]] = vector.extract %[[W]]#1[0] : vector<16xf32> from vector<2x16xf32> +// CHECK-NEXT: %[[R0:.*]] = vector.reduction , %[[ROW0]], %{{.*}} : vector<16xf32> into f32 +// CHECK: %[[ROW1:.*]] = vector.extract %[[W]]#1[1] : vector<16xf32> from vector<2x16xf32> +// CHECK-NEXT: %[[R1:.*]] = vector.reduction , %[[ROW1]], %{{.*}} : vector<16xf32> into f32 +// CHECK-NEXT: vector.from_elements %[[R0]], %[[R1]] : vector<2xf32> +gpu.module @test { +gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction() { + %0 = "some_def"() : () -> !xegpu.tensor_desc<32x1xf32, #xegpu.layout> + %src = "some_def"() {layout_result_0 = #xegpu.layout} : () -> (vector<32x16xf32>) + %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1]>} dense<0.0> : vector<32xf32> + %1 = vector.multi_reduction , %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1]>} [1] + : vector<32x16xf32> to vector<32xf32> + %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout} + : vector<32xf32> to vector<32x1xf32> + xegpu.store_nd %3, %0 : vector<32x1xf32>, !xegpu.tensor_desc<32x1xf32, #xegpu.layout> + gpu.return +} +} + +// ----- +// CHECK-REDUCTION-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction +// CHECK-REDUCTION: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (!xegpu.tensor_desc<16x2xf32, +// CHECK-REDUCTION-SAME: #xegpu.layout>, f32, f32) { +// CHECK-REDUCTION: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout} : () -> vector<16x2xf32> +// CHECK-REDUCTION-NEXT: %[[COL0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> +// CHECK-REDUCTION-NEXT: %[[CAST0:.*]] = vector.shape_cast %[[COL0]] : vector<16x1xf32> to vector<16xf32> +// CHECK-REDUCTION-NEXT: %[[R0:.*]] = vector.reduction , %[[CAST0]], %{{.*}} : vector<16xf32> into f32 +// CHECK-REDUCTION-NEXT: %[[COL1:.*]] = vector.extract_strided_slice %5 {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> +// CHECK-REDUCTION-NEXT: %[[CAST1:.*]] = vector.shape_cast %[[COL1]] : vector<16x1xf32> to vector<16xf32> +// CHECK-REDUCTION-NEXT: %[[R1:.*]] = vector.reduction , %[[CAST1]], %cst : vector<16xf32> into f32 +// CHECK-REDUCTION-NEXT: gpu.yield %4, %[[R1]], %[[R0]] : !xegpu.tensor_desc<16x2xf32, #xegpu.layout>, f32, f32 +// CHECK-REDUCTION-NEXT: } +// CHECK-REDUCTION-NEXT: vector.from_elements %[[W]]#2, %[[W]]#1 : vector<2xf32> +gpu.module @test { +gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction() { + %0 = "some_def"() : () -> !xegpu.tensor_desc<16x2xf32, #xegpu.layout> + %src = "some_def"() {layout_result_0 = #xegpu.layout} : () -> (vector<16x2xf32>) + %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} dense<0.0> : vector<2xf32> + %1 = vector.multi_reduction , %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} + [0] : vector<16x2xf32> to vector<2xf32> + %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout} + : vector<2xf32> to vector<1x2xf32> + %4 = vector.broadcast %3 {layout_result_0 = #xegpu.layout} : vector<1x2xf32> to vector<16x2xf32> + xegpu.store_nd %4, %0 : vector<16x2xf32>, !xegpu.tensor_desc<16x2xf32, #xegpu.layout> + gpu.return +} +} + // ----- // CHECK-LABEL: gpu.func @scatter_ops_chunksize({{.*}}) { // CHECK: %[[MASK:.*]] = arith.constant dense : vector<1xi1> diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp index 200323c7a4e51..e1ba45c60ac36 100644 --- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp +++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp @@ -170,7 +170,8 @@ class TestStepOpPattern : public OpConversionPattern { if (!sliceAttr || sliceAttr.getRank() != 1) return failure(); - std::optional> sgShape = sliceAttr.getSgDataAsInt(); + std::optional> sgShape = + sliceAttr.getEffectiveSgDataAsInt(); if (!sgShape) return failure(); From f3b712f6e4e9afed735962c6b96e0a2cadb03dc1 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 12 Sep 2025 17:37:30 +0100 Subject: [PATCH 152/734] [MLIR] Add debug log to the pass manager (NFC) (#156205) --- mlir/lib/Pass/Pass.cpp | 244 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 214 insertions(+), 30 deletions(-) diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index 7094c8e279f2d..521c7c6be17b6 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -21,11 +21,14 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/DebugLog.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Threading.h" #include +#define DEBUG_TYPE "pass-manager" + using namespace mlir; using namespace mlir::detail; @@ -242,6 +245,7 @@ LogicalResult OpPassManagerImpl::finalizePassList(MLIRContext *ctx) { }; // Walk the pass list and merge adjacent adaptors. + LDBG(3) << "Merging adjacent adaptors in pass list"; OpToOpPassAdaptor *lastAdaptor = nullptr; for (auto &pass : passes) { // Check to see if this pass is an adaptor. @@ -249,18 +253,26 @@ LogicalResult OpPassManagerImpl::finalizePassList(MLIRContext *ctx) { // If it is the first adaptor in a possible chain, remember it and // continue. if (!lastAdaptor) { + LDBG(3) << "Found first adaptor in chain"; lastAdaptor = currentAdaptor; continue; } // Otherwise, try to merge into the existing adaptor and delete the // current one. If merging fails, just remember this as the last adaptor. - if (succeeded(currentAdaptor->tryMergeInto(ctx, *lastAdaptor))) + LDBG(3) << "Attempting to merge adaptor with " + << currentAdaptor->getPassManagers().size() + << " managers into previous adaptor"; + if (succeeded(currentAdaptor->tryMergeInto(ctx, *lastAdaptor))) { + LDBG(3) << "Successfully merged adaptors, removing current one"; pass.reset(); - else + } else { + LDBG(3) << "Failed to merge adaptors, keeping current as last"; lastAdaptor = currentAdaptor; + } } else if (lastAdaptor) { // If this pass isn't an adaptor, finalize it and forget the last adaptor. + LDBG(3) << "Finalizing adaptor chain before non-adaptor pass"; if (failed(finalizeAdaptor(lastAdaptor))) return failure(); lastAdaptor = nullptr; @@ -273,15 +285,26 @@ LogicalResult OpPassManagerImpl::finalizePassList(MLIRContext *ctx) { // Now that the adaptors have been merged, erase any empty slots corresponding // to the merged adaptors that were nulled-out in the loop above. + size_t beforeErase = passes.size(); llvm::erase_if(passes, std::logical_not>()); + if (beforeErase != passes.size()) { + LDBG(3) << "Removed " << (beforeErase - passes.size()) + << " merged adaptor slots from pass list"; + } // If this is a op-agnostic pass manager, there is nothing left to do. std::optional rawOpName = getOpName(*ctx); - if (!rawOpName) + if (!rawOpName) { + LDBG(3) + << "Op-agnostic pass manager, skipping operation-specific verification"; return success(); + } // Otherwise, verify that all of the passes are valid for the current // operation anchor. + LDBG(3) << "Verifying " << passes.size() << " passes for operation '" + << getOpAnchorName() << "'"; + std::optional opName = rawOpName->getRegisteredInfo(); for (std::unique_ptr &pass : passes) { @@ -292,6 +315,8 @@ LogicalResult OpPassManagerImpl::finalizePassList(MLIRContext *ctx) { << "'!"; } } + + LDBG(3) << "Pass list finalization completed successfully"; return success(); } @@ -456,23 +481,45 @@ OpPassManager::Nesting OpPassManager::getNesting() { return impl->nesting; } LogicalResult OpPassManager::initialize(MLIRContext *context, unsigned newInitGeneration) { - if (impl->initializationGeneration == newInitGeneration) + + if (impl->initializationGeneration == newInitGeneration) { + LDBG(2) << "Pass manager already initialized " + << "' (generation " << newInitGeneration << ") with " << size() + << " passes"; return success(); + } + + LDBG(2) << "Initializing pass manager '" << getOpAnchorName() + << "' (generation " << newInitGeneration << ") with " << size() + << " passes"; impl->initializationGeneration = newInitGeneration; + for (Pass &pass : getPasses()) { // If this pass isn't an adaptor, directly initialize it. auto *adaptor = dyn_cast(&pass); if (!adaptor) { - if (failed(pass.initialize(context))) + LDBG(2) << "Initializing pass '" << pass.getName() << "'"; + if (failed(pass.initialize(context))) { + LDBG(2) << "Failed to initialize pass '" << pass.getName() << "'"; return failure(); + } continue; } // Otherwise, initialize each of the adaptors pass managers. + LDBG(3) << "Initializing adaptor pass with " + << adaptor->getPassManagers().size() << " nested managers"; for (OpPassManager &adaptorPM : adaptor->getPassManagers()) - if (failed(adaptorPM.initialize(context, newInitGeneration))) + if (failed(adaptorPM.initialize(context, newInitGeneration))) { + LDBG(2) << "Failed to initialize nested pass manager"; return failure(); + } } + + LDBG_OS([&](raw_ostream &os) { + os << "Pass manager initialization completed successfully: "; + printAsTextualPipeline(os, /*pretty=*/false); + }); return success(); } @@ -499,16 +546,23 @@ llvm::hash_code OpPassManager::hash() { LogicalResult OpToOpPassAdaptor::run(Pass *pass, Operation *op, AnalysisManager am, bool verifyPasses, unsigned parentInitGeneration) { + LDBG() << "Running pass '" << pass->getName() << "' on operation '" + << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "' at " + << op->getLoc(); + std::optional opInfo = op->getRegisteredInfo(); - if (!opInfo) + if (!opInfo) { return op->emitOpError() << "trying to schedule a pass on an unregistered operation"; - if (!opInfo->hasTrait()) + } + if (!opInfo->hasTrait()) { return op->emitOpError() << "trying to schedule a pass on an operation not " "marked as 'IsolatedFromAbove'"; - if (!pass->canScheduleOn(*op->getName().getRegisteredInfo())) + } + if (!pass->canScheduleOn(*op->getName().getRegisteredInfo())) { return op->emitOpError() << "trying to schedule a pass on an unsupported operation"; + } // Initialize the pass state with a callback for the pass to dynamically // execute a pipeline on the currently visited operation. @@ -526,8 +580,10 @@ LogicalResult OpToOpPassAdaptor::run(Pass *pass, Operation *op, pipeline.getImpl().canScheduleOn(*op->getContext(), root->getName())); // Before running, finalize the passes held by the pipeline. - if (failed(pipeline.getImpl().finalizePassList(root->getContext()))) + if (failed(pipeline.getImpl().finalizePassList(root->getContext()))) { + LDBG() << "Failed to finalize pass list for pipeline"; return failure(); + } // Initialize the user provided pipeline and execute the pipeline. if (failed(pipeline.initialize(root->getContext(), parentInitGeneration))) @@ -599,6 +655,13 @@ LogicalResult OpToOpPassAdaptor::runPipeline( OpPassManager &pm, Operation *op, AnalysisManager am, bool verifyPasses, unsigned parentInitGeneration, PassInstrumentor *instrumentor, const PassInstrumentation::PipelineParentInfo *parentInfo) { + LDBG_OS([&](raw_ostream &os) { + os << "Running pipeline on operation '" + << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "' with " + << pm.size() << " passes, verifyPasses=" << verifyPasses + << " pipeline: "; + pm.printAsTextualPipeline(os, /*pretty=*/false); + }); assert((!instrumentor || parentInfo) && "expected parent info if instrumentor is provided"); auto scopeExit = llvm::make_scope_exit([&] { @@ -615,9 +678,14 @@ LogicalResult OpToOpPassAdaptor::runPipeline( *parentInfo); } - for (Pass &pass : pm.getPasses()) - if (failed(run(&pass, op, am, verifyPasses, parentInitGeneration))) + for (Pass &pass : pm.getPasses()) { + if (failed(run(&pass, op, am, verifyPasses, parentInitGeneration))) { + LDBG() << "Pipeline failed for pass '" << pass.getName() + << "' on operation '" + << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "'"; return failure(); + } + } if (instrumentor) { instrumentor->runAfterPipeline(pm.getOpName(*op->getContext()), @@ -630,9 +698,19 @@ LogicalResult OpToOpPassAdaptor::runPipeline( /// does not exist. static OpPassManager * findPassManagerWithAnchor(MutableArrayRef mgrs, StringRef name) { + LDBG(3) << "Looking for pass manager with anchor name '" << name << "' among " + << mgrs.size() << " managers"; + auto *it = llvm::find_if( mgrs, [&](OpPassManager &mgr) { return mgr.getOpAnchorName() == name; }); - return it == mgrs.end() ? nullptr : &*it; + + if (it == mgrs.end()) { + LDBG(2) << "No pass manager found with anchor name '" << name << "'"; + return nullptr; + } + + LDBG(2) << "Found pass manager with anchor name '" << name << "'"; + return &*it; } /// Find an operation pass manager that can operate on an operation of the given @@ -640,10 +718,22 @@ findPassManagerWithAnchor(MutableArrayRef mgrs, StringRef name) { static OpPassManager *findPassManagerFor(MutableArrayRef mgrs, OperationName name, MLIRContext &context) { + LDBG(4) << "Looking for pass manager that can handle operation '" << name + << "' among " << mgrs.size() << " managers"; + auto *it = llvm::find_if(mgrs, [&](OpPassManager &mgr) { return mgr.getImpl().canScheduleOn(context, name); }); - return it == mgrs.end() ? nullptr : &*it; + + if (it == mgrs.end()) { + LDBG(4) << "No pass manager found that can handle operation '" << name + << "'"; + return nullptr; + } + + LDBG(4) << "Found pass manager '" << it->getOpAnchorName() + << "' that can handle operation '" << name << "'"; + return &*it; } OpToOpPassAdaptor::OpToOpPassAdaptor(OpPassManager &&mgr) { @@ -657,6 +747,9 @@ void OpToOpPassAdaptor::getDependentDialects(DialectRegistry &dialects) const { LogicalResult OpToOpPassAdaptor::tryMergeInto(MLIRContext *ctx, OpToOpPassAdaptor &rhs) { + LDBG(3) << "Attempting to merge pass adaptor with " << mgrs.size() + << " managers into rhs with " << rhs.mgrs.size() << " managers"; + // Functor used to check if a pass manager is generic, i.e. op-agnostic. auto isGenericPM = [&](OpPassManager &pm) { return !pm.getOpName(); }; @@ -682,14 +775,24 @@ LogicalResult OpToOpPassAdaptor::tryMergeInto(MLIRContext *ctx, // // Check the current adaptor. auto *lhsGenericPMIt = llvm::find_if(mgrs, isGenericPM); - if (lhsGenericPMIt != mgrs.end() && - hasScheduleConflictWith(*lhsGenericPMIt, rhs.mgrs)) - return failure(); + if (lhsGenericPMIt != mgrs.end()) { + LDBG(4) << "Found generic pass manager on LHS, checking for conflicts"; + if (hasScheduleConflictWith(*lhsGenericPMIt, rhs.mgrs)) { + LDBG(4) + << "Merge failed: LHS generic pass manager has conflicts with RHS"; + return failure(); + } + } // Check the rhs adaptor. auto *rhsGenericPMIt = llvm::find_if(rhs.mgrs, isGenericPM); - if (rhsGenericPMIt != rhs.mgrs.end() && - hasScheduleConflictWith(*rhsGenericPMIt, mgrs)) - return failure(); + if (rhsGenericPMIt != rhs.mgrs.end()) { + LDBG(4) << "Found generic pass manager on RHS, checking for conflicts"; + if (hasScheduleConflictWith(*rhsGenericPMIt, mgrs)) { + LDBG(4) + << "Merge failed: RHS generic pass manager has conflicts with LHS"; + return failure(); + } + } for (auto &pm : mgrs) { // If an existing pass manager exists, then merge the given pass manager @@ -744,25 +847,51 @@ void OpToOpPassAdaptor::runOnOperation(bool verifyPasses) { /// Run this pass adaptor synchronously. void OpToOpPassAdaptor::runOnOperationImpl(bool verifyPasses) { + LDBG_OS([&](raw_ostream &os) { + os << "Running pass adaptor synchronously on operation '" + << OpWithFlags(getOperation(), OpPrintingFlags().skipRegions()) + << "' with " << mgrs.size() + << " pass managers, verifyPasses=" << verifyPasses << " pipeline: "; + printAsTextualPipeline(os, /*pretty=*/false); + }); + auto am = getAnalysisManager(); PassInstrumentation::PipelineParentInfo parentInfo = {llvm::get_threadid(), this}; auto *instrumentor = am.getPassInstrumentor(); + + unsigned processedOps = 0; for (auto ®ion : getOperation()->getRegions()) { for (auto &block : region) { for (auto &op : block) { auto *mgr = findPassManagerFor(mgrs, op.getName(), *op.getContext()); - if (!mgr) + if (!mgr) { + LDBG(2) << "Skipping operation '" + << OpWithFlags(&op, OpPrintingFlags().skipRegions()) + << "': no suitable pass manager found"; continue; + } // Run the held pipeline over the current operation. + LDBG(2) << "Processing operation '" + << OpWithFlags(&op, OpPrintingFlags().skipRegions()) + << "' with pass manager '" << mgr->getOpAnchorName() << "'"; + unsigned initGeneration = mgr->impl->initializationGeneration; if (failed(runPipeline(*mgr, &op, am.nest(&op), verifyPasses, - initGeneration, instrumentor, &parentInfo))) + initGeneration, instrumentor, &parentInfo))) { + LDBG(2) << "Pipeline failed for operation '" + << OpWithFlags(&op, OpPrintingFlags().skipRegions()) << "'"; signalPassFailure(); + } else { + processedOps++; + } } } } + + LDBG() << "Completed synchronous pass adaptor run, processed " << processedOps + << " operations"; } /// Utility functor that checks if the two ranges of pass managers have a size @@ -776,13 +905,24 @@ static bool hasSizeMismatch(ArrayRef lhs, /// Run this pass adaptor synchronously. void OpToOpPassAdaptor::runOnOperationAsyncImpl(bool verifyPasses) { + LDBG_OS([&](raw_ostream &os) { + os << "Running pass adaptor asynchronously on operation '" + << OpWithFlags(getOperation(), OpPrintingFlags().skipRegions()) + << "' with " << mgrs.size() + << " pass managers, verifyPasses=" << verifyPasses << " pipeline: "; + printAsTextualPipeline(os, /*pretty=*/false); + }); + AnalysisManager am = getAnalysisManager(); MLIRContext *context = &getContext(); // Create the async executors if they haven't been created, or if the main // pipeline has changed. - if (asyncExecutors.empty() || hasSizeMismatch(asyncExecutors.front(), mgrs)) + if (asyncExecutors.empty() || hasSizeMismatch(asyncExecutors.front(), mgrs)) { + LDBG(2) << "Creating " << context->getThreadPool().getMaxConcurrency() + << " async executors"; asyncExecutors.assign(context->getThreadPool().getMaxConcurrency(), mgrs); + } // This struct represents the information for a single operation to be // scheduled on a pass manager. @@ -803,21 +943,36 @@ void OpToOpPassAdaptor::runOnOperationAsyncImpl(bool verifyPasses) { // operation, as well as providing a queue of operations to execute over. std::vector opInfos; DenseMap> knownOpPMIdx; + + LDBG(2) << "Collecting operations for async execution"; for (auto ®ion : getOperation()->getRegions()) { for (Operation &op : region.getOps()) { // Get the pass manager index for this operation type. auto pmIdxIt = knownOpPMIdx.try_emplace(op.getName(), std::nullopt); if (pmIdxIt.second) { - if (auto *mgr = findPassManagerFor(mgrs, op.getName(), *context)) + if (auto *mgr = findPassManagerFor(mgrs, op.getName(), *context)) { pmIdxIt.first->second = std::distance(mgrs.begin(), mgr); + LDBG(2) << "Operation '" + << OpWithFlags(&op, OpPrintingFlags().skipRegions()) + << "' will use pass manager '" << mgr->getOpAnchorName() + << "'"; + } } // If this operation can be scheduled, add it to the list. - if (pmIdxIt.first->second) + if (pmIdxIt.first->second) { opInfos.emplace_back(*pmIdxIt.first->second, &op, am.nest(&op)); + } else { + LDBG(2) << "Operation '" + << OpWithFlags(&op, OpPrintingFlags().skipRegions()) + << "' skipped: no suitable pass manager"; + } } } + LDBG(2) << "Collected " << opInfos.size() + << " operations for async execution"; + // Get the current thread for this adaptor. PassInstrumentation::PipelineParentInfo parentInfo = {llvm::get_threadid(), this}; @@ -872,23 +1027,36 @@ void PassManager::enableVerifier(bool enabled) { verifyPasses = enabled; } /// Run the passes within this manager on the provided operation. LogicalResult PassManager::run(Operation *op) { + LDBG_OS([&](raw_ostream &os) { + os << "Starting PassManager run on operation '" + << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "' with " + << size() << " passes, verifyPasses=" << verifyPasses << " pipeline: "; + printAsTextualPipeline(os, /*pretty=*/false); + }); + MLIRContext *context = getContext(); std::optional anchorOp = getOpName(*context); - if (anchorOp && anchorOp != op->getName()) + if (anchorOp && anchorOp != op->getName()) { return emitError(op->getLoc()) << "can't run '" << getOpAnchorName() << "' pass manager on '" << op->getName() << "' op"; + } // Register all dialects for the current pipeline. + LDBG(2) << "Registering dependent dialects for pipeline"; DialectRegistry dependentDialects; getDependentDialects(dependentDialects); context->appendDialectRegistry(dependentDialects); - for (StringRef name : dependentDialects.getDialectNames()) + for (StringRef name : dependentDialects.getDialectNames()) { + LDBG(2) << "Loading dialect: " << name; context->getOrLoadDialect(name); + } // Before running, make sure to finalize the pipeline pass list. - if (failed(getImpl().finalizePassList(context))) + if (failed(getImpl().finalizePassList(context))) { + LDBG(2) << "Pass list finalization failed"; return failure(); + } // Notify the context that we start running a pipeline for bookkeeping. context->enterMultiThreadedExecution(); @@ -898,17 +1066,27 @@ LogicalResult PassManager::run(Operation *op) { llvm::hash_code pipelineKey = hash(); if (newInitKey != initializationKey || pipelineKey != pipelineInitializationKey) { - if (failed(initialize(context, impl->initializationGeneration + 1))) + LDBG(2) << "Initializing passes with new generation: " + << (impl->initializationGeneration + 1); + if (failed(initialize(context, impl->initializationGeneration + 1))) { + LDBG(2) << "Pass initialization failed"; return failure(); + } initializationKey = newInitKey; pipelineInitializationKey = pipelineKey; + } else { + LDBG(2) << "Using existing pass initialization (generation: " + << impl->initializationGeneration << ")"; } // Construct a top level analysis manager for the pipeline. + LDBG(2) << "Constructing analysis manager for pipeline execution"; ModuleAnalysisManager am(op, instrumentor.get()); // If reproducer generation is enabled, run the pass manager with crash // handling enabled. + LDBG(2) << "Executing pipeline with " + << (crashReproGenerator ? "crash recovery" : "normal execution"); LogicalResult result = crashReproGenerator ? runWithCrashRecovery(op, am) : runPasses(op, am); @@ -916,8 +1094,13 @@ LogicalResult PassManager::run(Operation *op) { context->exitMultiThreadedExecution(); // Dump all of the pass statistics if necessary. - if (passStatisticsMode) + if (passStatisticsMode) { + LDBG(2) << "Dumping pass statistics"; dumpStatistics(); + } + + LDBG(2) << "PassManager run completed with result: " + << (succeeded(result) ? "success" : "failure"); return result; } @@ -930,6 +1113,7 @@ void PassManager::addInstrumentation(std::unique_ptr pi) { } LogicalResult PassManager::runPasses(Operation *op, AnalysisManager am) { + LDBG(2) << "Executing passes using OpToOpPassAdaptor pipeline"; return OpToOpPassAdaptor::runPipeline(*this, op, am, verifyPasses, impl->initializationGeneration); } From 04d38bed70698d8591b3ac7b6b13635b1e894c5a Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Fri, 12 Sep 2025 18:35:58 +0200 Subject: [PATCH 153/734] [clang] Regenerate test checks including TBAA semantics (NFC) Tests exercizing TBAA metadata (both purposefully and not), and previously generated via UTC, have been regenerated and updated to version 6. --- clang/test/C/C11/n1285_1.c | 42 +- clang/test/C/C2y/n3254.c | 2 +- clang/test/CodeGen/AArch64/fp8-init-list.c | 10 +- clang/test/CodeGen/AArch64/ls64-inline-asm.c | 52 +- .../CodeGen/LoongArch/lasx/builtin-alias.c | 7658 +++++++------ .../lasx/builtin-approximate-alias.c | 43 +- .../LoongArch/lasx/builtin-approximate.c | 43 +- clang/test/CodeGen/LoongArch/lasx/builtin.c | 7655 +++++++------ .../PowerPC/builtins-ppc-build-pair-mma.c | 87 +- clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c | 193 +- .../CodeGen/PowerPC/builtins-ppc-pair-mma.c | 919 +- .../attr-rvv-vector-bits-bitcast-less-8.c | 47 +- .../RISCV/attr-rvv-vector-bits-bitcast.c | 195 +- .../CodeGen/RISCV/attr-rvv-vector-bits-cast.c | 109 +- .../RISCV/attr-rvv-vector-bits-globals.c | 123 +- .../CodeGen/SystemZ/builtins-systemz-i128.c | 124 +- .../SystemZ/gnu-atomic-builtins-i128-16Al.c | 186 +- .../SystemZ/gnu-atomic-builtins-i128-8Al.c | 186 +- .../CodeGen/SystemZ/sync-builtins-i128-16Al.c | 168 +- clang/test/CodeGen/SystemZ/zvector2.c | 116 +- clang/test/CodeGen/allow-ubsan-check.c | 26 +- .../attr-arm-sve-vector-bits-bitcast.c | 255 +- .../CodeGen/attr-arm-sve-vector-bits-cast.c | 88 +- .../attr-arm-sve-vector-bits-globals.c | 135 +- .../CodeGen/attr-counted-by-for-pointers.c | 240 +- clang/test/CodeGen/attr-counted-by-pr110385.c | 30 +- clang/test/CodeGen/attr-counted-by.c | 1065 +- clang/test/CodeGen/builtin-maxnum-minnum.c | 92 +- clang/test/CodeGen/cleanup-destslot-simple.c | 174 +- clang/test/CodeGen/isfpclass.c | 101 +- .../math-libcalls-tbaa-indirect-args.c | 84 +- clang/test/CodeGen/math-libcalls-tbaa.c | 241 +- .../CodeGen/sanitize-metadata-nosanitize.c | 52 +- .../attr-likelihood-if-branch-weights.cpp | 439 +- .../attr-likelihood-iteration-stmt.cpp | 281 +- .../attr-likelihood-switch-branch-weights.cpp | 417 +- clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp | 34 +- .../CodeGenCXX/inline-then-fold-variadics.cpp | 111 +- .../CodeGenCXX/load-reference-metadata.cpp | 52 +- .../CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl | 65 +- .../CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 178 +- clang/test/CodeGenOpenCL/amdgpu-printf.cl | 42 +- .../builtins-amdgcn-gfx12-wmma-w32.cl | 106 +- .../builtins-amdgcn-gfx12-wmma-w64.cl | 106 +- ...ins-amdgcn-gfx1250-async-load-store-lds.cl | 108 +- .../builtins-amdgcn-swmmac-w32.cl | 106 +- .../builtins-amdgcn-swmmac-w64.cl | 106 +- .../CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl | 79 +- .../CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl | 79 +- ...plicit-addrspacecast-function-parameter.cl | 8 +- clang/test/CodeGenOpenCL/preserve_vec3.cl | 46 +- .../array-type-infinite-loop.clcpp | 12 +- .../Generic/unsigned-promotion-debuginfo.c | 22 +- clang/test/Headers/__clang_hip_math.hip | 9591 +++++++++-------- clang/test/Headers/wasm.c | 2395 ++-- clang/test/OpenMP/bug54082.c | 42 +- clang/test/OpenMP/bug56913.c | 34 +- clang/test/OpenMP/bug57757.cpp | 44 +- ...arallel_reduction_codegen_tbaa_PR46146.cpp | 997 +- .../OpenMP/parallel_if_codegen_PR51349.cpp | 75 +- .../taskloop_strictmodifier_codegen.cpp | 275 +- 61 files changed, 20281 insertions(+), 16110 deletions(-) diff --git a/clang/test/C/C11/n1285_1.c b/clang/test/C/C11/n1285_1.c index 25b68e3145b04..345ec94a1eeef 100644 --- a/clang/test/C/C11/n1285_1.c +++ b/clang/test/C/C11/n1285_1.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple=x86_64 -std=c99 -Wno-dangling -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK // RUN: %clang_cc1 -triple=x86_64 -std=c11 -Wno-dangling -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK // RUN: %clang_cc1 -triple=x86_64 -std=c11 -O2 -disable-llvm-passes -Wno-dangling -emit-llvm -o - %s | FileCheck %s --check-prefix=C11-O2 @@ -32,9 +32,9 @@ struct X f(void); // C11-O2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0 // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr [[P]], align 8, !tbaa [[TBAA2:![0-9]+]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr [[P]], align 8, !tbaa [[INTPTR_TBAA2:![0-9]+]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7:![0-9]+]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[P]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[TMP1]] // @@ -91,18 +91,18 @@ int func_return(void) { // C11-O2: [[COND_END]]: // C11-O2-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A1]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[TBAA2]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] // C11-O2-NEXT: call void @llvm.lifetime.start.p0(ptr [[Q]]) #[[ATTR5]] // C11-O2-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[DOTCOMPOUNDLITERAL]], i8 0, i64 20, i1 false) // C11-O2-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[DOTCOMPOUNDLITERAL]], i32 0, i32 0 // C11-O2-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[DOTCOMPOUNDLITERAL]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [5 x i32], ptr [[A3]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY4]], ptr [[Q]], align 8, !tbaa [[TBAA2]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7]] -// C11-O2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[Q]], align 8, !tbaa [[TBAA2]] -// C11-O2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA7]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY4]], ptr [[Q]], align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]] +// C11-O2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[Q]], align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA7]] // C11-O2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP3]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[Q]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[ADD]] @@ -138,10 +138,10 @@ int ternary(void) { // C11-O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 4 [[X]], i64 20, i1 false), !tbaa.struct [[TBAA_STRUCT9:![0-9]+]] // C11-O2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[TBAA2]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[TMP1]] // @@ -175,10 +175,10 @@ int comma(void) { // C11-O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 4 [[X]], i64 20, i1 false), !tbaa.struct [[TBAA_STRUCT9]] // C11-O2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[TBAA2]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[TMP1]] // @@ -217,10 +217,10 @@ int cast(void) { // C11-O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 4 [[X]], i64 20, i1 false), !tbaa.struct [[TBAA_STRUCT9]] // C11-O2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_X]], ptr [[REF_TMP]], i32 0, i32 0 // C11-O2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[A]], i64 0, i64 0 -// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[TBAA2]] +// C11-O2-NEXT: store ptr [[ARRAYDECAY]], ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR5]] -// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[TBAA2]] -// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7]] +// C11-O2-NEXT: [[TMP0:%.*]] = load ptr, ptr @p, align 8, !tbaa [[INTPTR_TBAA2]] +// C11-O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA7]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[S]]) #[[ATTR5]] // C11-O2-NEXT: call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR5]] // C11-O2-NEXT: ret i32 [[TMP1]] @@ -232,12 +232,12 @@ int assign(void) { return *p; } //. -// C11-O2: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// C11-O2: [[INTPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // C11-O2: [[META3]] = !{!"p1 int", [[META4:![0-9]+]], i64 0} // C11-O2: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0} // C11-O2: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} // C11-O2: [[META6]] = !{!"Simple C/C++ TBAA"} -// C11-O2: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// C11-O2: [[INT_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} // C11-O2: [[META8]] = !{!"int", [[META5]], i64 0} // C11-O2: [[TBAA_STRUCT9]] = !{i64 0, i64 20, [[META10:![0-9]+]]} // C11-O2: [[META10]] = !{[[META5]], [[META5]], i64 0} diff --git a/clang/test/C/C2y/n3254.c b/clang/test/C/C2y/n3254.c index e114735a9cb79..9f8c47756df32 100644 --- a/clang/test/C/C2y/n3254.c +++ b/clang/test/C/C2y/n3254.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple=x86_64 -std=c2y %s -emit-llvm -o - | FileCheck %s /* WG14 N3254: Yes diff --git a/clang/test/CodeGen/AArch64/fp8-init-list.c b/clang/test/CodeGen/AArch64/fp8-init-list.c index 8b4b31a71c46a..7c0f6278b2090 100644 --- a/clang/test/CodeGen/AArch64/fp8-init-list.c +++ b/clang/test/CodeGen/AArch64/fp8-init-list.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX @@ -34,25 +34,25 @@ struct S s; // CHECK-LABEL: define dso_local void @f( // CHECK-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[__MFP8_TBAA2:![0-9]+]] // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z1fu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-CXX-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[__MFP8_TBAA2:![0-9]+]] // CHECK-CXX-NEXT: ret void // void f(__mfp8 x) { s = (struct S){x}; } //. -// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[__MFP8_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK: [[META3]] = !{!"__mfp8", [[META4:![0-9]+]], i64 0} // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-CXX: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-CXX: [[__MFP8_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK-CXX: [[META3]] = !{!"__mfp8", [[META4:![0-9]+]], i64 0} // CHECK-CXX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK-CXX: [[META5]] = !{!"Simple C++ TBAA"} diff --git a/clang/test/CodeGen/AArch64/ls64-inline-asm.c b/clang/test/CodeGen/AArch64/ls64-inline-asm.c index 8aa0684dba14d..1d217eb8801e5 100644 --- a/clang/test/CodeGen/AArch64/ls64-inline-asm.c +++ b/clang/test/CodeGen/AArch64/ls64-inline-asm.c @@ -1,12 +1,13 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple aarch64 -target-feature +ls64 -O1 -emit-llvm -x c %s -o - | FileCheck %s struct foo { unsigned long long x[8]; }; -// CHECK-LABEL: @load( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[ADDR:%.*]]) #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] -// CHECK-NEXT: store i512 [[TMP0]], ptr [[OUTPUT:%.*]], align 8 +// CHECK-LABEL: define dso_local void @load( +// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 64)) [[OUTPUT:%.*]], ptr noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[ADDR]]) #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] +// CHECK-NEXT: store i512 [[TMP0]], ptr [[OUTPUT]], align 8 // CHECK-NEXT: ret void // void load(struct foo *output, void *addr) @@ -14,10 +15,11 @@ void load(struct foo *output, void *addr) __asm__ volatile ("ld64b %0,[%1]" : "=r" (*output) : "r" (addr) : "memory"); } -// CHECK-LABEL: @store( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i512, ptr [[INPUT:%.*]], align 8 -// CHECK-NEXT: tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP0]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc [[META3:![0-9]+]] +// CHECK-LABEL: define dso_local void @store( +// CHECK-SAME: ptr noundef readonly captures(none) [[INPUT:%.*]], ptr noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i512, ptr [[INPUT]], align 8 +// CHECK-NEXT: tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP0]], ptr [[ADDR]]) #[[ATTR1]], !srcloc [[META3:![0-9]+]] // CHECK-NEXT: ret void // void store(const struct foo *input, void *addr) @@ -25,30 +27,31 @@ void store(const struct foo *input, void *addr) __asm__ volatile ("st64b %0,[%1]" : : "r" (*input), "r" (addr) : "memory" ); } -// CHECK-LABEL: @store2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[IN:%.*]], align 4, !tbaa [[TBAA4:![0-9]+]] +// CHECK-LABEL: define dso_local void @store2( +// CHECK-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[IN]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 16 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[CONV5:%.*]] = sext i32 [[TMP2]] to i64 // CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 64 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[CONV8:%.*]] = sext i32 [[TMP3]] to i64 // CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 100 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[CONV11:%.*]] = sext i32 [[TMP4]] to i64 // CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 144 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[CONV14:%.*]] = sext i32 [[TMP5]] to i64 // CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 196 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[CONV17:%.*]] = sext i32 [[TMP6]] to i64 // CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 256 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[CONV20:%.*]] = sext i32 [[TMP7]] to i64 // CHECK-NEXT: [[S_SROA_10_0_INSERT_EXT:%.*]] = zext i64 [[CONV20]] to i512 // CHECK-NEXT: [[S_SROA_10_0_INSERT_SHIFT:%.*]] = shl nuw i512 [[S_SROA_10_0_INSERT_EXT]], 448 @@ -72,7 +75,7 @@ void store(const struct foo *input, void *addr) // CHECK-NEXT: [[S_SROA_0_0_INSERT_EXT:%.*]] = zext i64 [[CONV]] to i512 // CHECK-NEXT: [[S_SROA_0_0_INSERT_MASK:%.*]] = or disjoint i512 [[S_SROA_4_0_INSERT_MASK]], [[S_SROA_4_0_INSERT_SHIFT]] // CHECK-NEXT: [[S_SROA_0_0_INSERT_INSERT:%.*]] = or i512 [[S_SROA_0_0_INSERT_MASK]], [[S_SROA_0_0_INSERT_EXT]] -// CHECK-NEXT: tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[S_SROA_0_0_INSERT_INSERT]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc [[META8:![0-9]+]] +// CHECK-NEXT: tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[S_SROA_0_0_INSERT_INSERT]], ptr [[ADDR]]) #[[ATTR1]], !srcloc [[META8:![0-9]+]] // CHECK-NEXT: ret void // void store2(int *in, void *addr) @@ -80,3 +83,12 @@ void store2(int *in, void *addr) struct foo s = { in[0], in[1], in[4], in[16], in[25], in[36], in[49], in[64] }; __asm__ volatile ("st64b %0,[%1]" : : "r" (s), "r" (addr) : "memory" ); } +//. +// CHECK: [[META2]] = !{i64 789} +// CHECK: [[META3]] = !{i64 1368} +// CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META8]] = !{i64 5992} +//. diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c index 9a8ce224bcfd0..dd094e5493a60 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c @@ -1,6386 +1,7130 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s #include -// CHECK-LABEL: @xvsll_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsll_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } -// CHECK-LABEL: @xvsll_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsll_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } -// CHECK-LABEL: @xvsll_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsll_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } -// CHECK-LABEL: @xvsll_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsll_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } -// CHECK-LABEL: @xvslli_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslli_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } -// CHECK-LABEL: @xvslli_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslli_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } -// CHECK-LABEL: @xvslli_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslli_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } -// CHECK-LABEL: @xvslli_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslli_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } -// CHECK-LABEL: @xvsra_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsra_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } -// CHECK-LABEL: @xvsra_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsra_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } -// CHECK-LABEL: @xvsra_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsra_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } -// CHECK-LABEL: @xvsra_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsra_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } -// CHECK-LABEL: @xvsrai_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrai_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } -// CHECK-LABEL: @xvsrai_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrai_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } -// CHECK-LABEL: @xvsrai_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrai_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } -// CHECK-LABEL: @xvsrai_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrai_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } -// CHECK-LABEL: @xvsrar_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrar_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } -// CHECK-LABEL: @xvsrar_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrar_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } -// CHECK-LABEL: @xvsrar_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrar_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } -// CHECK-LABEL: @xvsrar_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrar_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } -// CHECK-LABEL: @xvsrari_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrari_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } -// CHECK-LABEL: @xvsrari_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrari_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } -// CHECK-LABEL: @xvsrari_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrari_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } -// CHECK-LABEL: @xvsrari_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrari_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } -// CHECK-LABEL: @xvsrl_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrl_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } -// CHECK-LABEL: @xvsrl_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrl_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } -// CHECK-LABEL: @xvsrl_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrl_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } -// CHECK-LABEL: @xvsrl_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrl_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } -// CHECK-LABEL: @xvsrli_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrli_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } -// CHECK-LABEL: @xvsrli_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrli_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } -// CHECK-LABEL: @xvsrli_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrli_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } -// CHECK-LABEL: @xvsrli_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrli_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } -// CHECK-LABEL: @xvsrlr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } -// CHECK-LABEL: @xvsrlr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } -// CHECK-LABEL: @xvsrlr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } -// CHECK-LABEL: @xvsrlr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } -// CHECK-LABEL: @xvsrlri_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlri_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } -// CHECK-LABEL: @xvsrlri_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlri_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } -// CHECK-LABEL: @xvsrlri_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlri_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } -// CHECK-LABEL: @xvsrlri_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlri_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } -// CHECK-LABEL: @xvbitclr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } -// CHECK-LABEL: @xvbitclr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } -// CHECK-LABEL: @xvbitclr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } -// CHECK-LABEL: @xvbitclr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } -// CHECK-LABEL: @xvbitclri_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclri_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } -// CHECK-LABEL: @xvbitclri_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclri_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } -// CHECK-LABEL: @xvbitclri_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclri_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } -// CHECK-LABEL: @xvbitclri_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclri_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } -// CHECK-LABEL: @xvbitset_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitset_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } -// CHECK-LABEL: @xvbitset_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitset_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } -// CHECK-LABEL: @xvbitset_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitset_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } -// CHECK-LABEL: @xvbitset_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitset_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } -// CHECK-LABEL: @xvbitseti_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseti_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } -// CHECK-LABEL: @xvbitseti_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseti_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } -// CHECK-LABEL: @xvbitseti_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseti_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } -// CHECK-LABEL: @xvbitseti_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseti_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } -// CHECK-LABEL: @xvbitrev_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrev_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } -// CHECK-LABEL: @xvbitrev_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrev_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } -// CHECK-LABEL: @xvbitrev_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrev_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } -// CHECK-LABEL: @xvbitrev_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrev_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } -// CHECK-LABEL: @xvbitrevi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrevi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } -// CHECK-LABEL: @xvbitrevi_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrevi_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } -// CHECK-LABEL: @xvbitrevi_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrevi_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } -// CHECK-LABEL: @xvbitrevi_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrevi_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } -// CHECK-LABEL: @xvadd_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } -// CHECK-LABEL: @xvadd_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } -// CHECK-LABEL: @xvadd_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } -// CHECK-LABEL: @xvadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } -// CHECK-LABEL: @xvaddi_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddi_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } -// CHECK-LABEL: @xvaddi_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddi_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } -// CHECK-LABEL: @xvaddi_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddi_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } -// CHECK-LABEL: @xvaddi_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddi_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } -// CHECK-LABEL: @xvsub_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } -// CHECK-LABEL: @xvsub_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } -// CHECK-LABEL: @xvsub_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } -// CHECK-LABEL: @xvsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } -// CHECK-LABEL: @xvsubi_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubi_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } -// CHECK-LABEL: @xvsubi_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubi_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } -// CHECK-LABEL: @xvsubi_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubi_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } -// CHECK-LABEL: @xvsubi_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubi_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } -// CHECK-LABEL: @xvmax_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } -// CHECK-LABEL: @xvmax_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } -// CHECK-LABEL: @xvmax_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } -// CHECK-LABEL: @xvmax_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } -// CHECK-LABEL: @xvmaxi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } -// CHECK-LABEL: @xvmaxi_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } -// CHECK-LABEL: @xvmaxi_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } -// CHECK-LABEL: @xvmaxi_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } -// CHECK-LABEL: @xvmax_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } -// CHECK-LABEL: @xvmax_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } -// CHECK-LABEL: @xvmax_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } -// CHECK-LABEL: @xvmax_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } -// CHECK-LABEL: @xvmaxi_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } -// CHECK-LABEL: @xvmaxi_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } -// CHECK-LABEL: @xvmaxi_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } -// CHECK-LABEL: @xvmaxi_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } -// CHECK-LABEL: @xvmin_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } -// CHECK-LABEL: @xvmin_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } -// CHECK-LABEL: @xvmin_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } -// CHECK-LABEL: @xvmin_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } -// CHECK-LABEL: @xvmini_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } -// CHECK-LABEL: @xvmini_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } -// CHECK-LABEL: @xvmini_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } -// CHECK-LABEL: @xvmini_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } -// CHECK-LABEL: @xvmin_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } -// CHECK-LABEL: @xvmin_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } -// CHECK-LABEL: @xvmin_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } -// CHECK-LABEL: @xvmin_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } -// CHECK-LABEL: @xvmini_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } -// CHECK-LABEL: @xvmini_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } -// CHECK-LABEL: @xvmini_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } -// CHECK-LABEL: @xvmini_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } -// CHECK-LABEL: @xvseq_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseq_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } -// CHECK-LABEL: @xvseq_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseq_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } -// CHECK-LABEL: @xvseq_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseq_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } -// CHECK-LABEL: @xvseq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } -// CHECK-LABEL: @xvseqi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseqi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } -// CHECK-LABEL: @xvseqi_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseqi_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } -// CHECK-LABEL: @xvseqi_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseqi_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } -// CHECK-LABEL: @xvseqi_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseqi_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } -// CHECK-LABEL: @xvslt_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } -// CHECK-LABEL: @xvslt_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } -// CHECK-LABEL: @xvslt_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } -// CHECK-LABEL: @xvslt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } -// CHECK-LABEL: @xvslti_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } -// CHECK-LABEL: @xvslti_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } -// CHECK-LABEL: @xvslti_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } -// CHECK-LABEL: @xvslti_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } -// CHECK-LABEL: @xvslt_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } -// CHECK-LABEL: @xvslt_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } -// CHECK-LABEL: @xvslt_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } -// CHECK-LABEL: @xvslt_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } -// CHECK-LABEL: @xvslti_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } -// CHECK-LABEL: @xvslti_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } -// CHECK-LABEL: @xvslti_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } -// CHECK-LABEL: @xvslti_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } -// CHECK-LABEL: @xvsle_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } -// CHECK-LABEL: @xvsle_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } -// CHECK-LABEL: @xvsle_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } -// CHECK-LABEL: @xvsle_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } -// CHECK-LABEL: @xvslei_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } -// CHECK-LABEL: @xvslei_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } -// CHECK-LABEL: @xvslei_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } -// CHECK-LABEL: @xvslei_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } -// CHECK-LABEL: @xvsle_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } -// CHECK-LABEL: @xvsle_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } -// CHECK-LABEL: @xvsle_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } -// CHECK-LABEL: @xvsle_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } -// CHECK-LABEL: @xvslei_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } -// CHECK-LABEL: @xvslei_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } -// CHECK-LABEL: @xvslei_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } -// CHECK-LABEL: @xvslei_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } -// CHECK-LABEL: @xvsat_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } -// CHECK-LABEL: @xvsat_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } -// CHECK-LABEL: @xvsat_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } -// CHECK-LABEL: @xvsat_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } -// CHECK-LABEL: @xvsat_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } -// CHECK-LABEL: @xvsat_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } -// CHECK-LABEL: @xvsat_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } -// CHECK-LABEL: @xvsat_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } -// CHECK-LABEL: @xvadda_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadda_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } -// CHECK-LABEL: @xvadda_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadda_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } -// CHECK-LABEL: @xvadda_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadda_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } -// CHECK-LABEL: @xvadda_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadda_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } -// CHECK-LABEL: @xvsadd_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } -// CHECK-LABEL: @xvsadd_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } -// CHECK-LABEL: @xvsadd_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } -// CHECK-LABEL: @xvsadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } -// CHECK-LABEL: @xvsadd_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } -// CHECK-LABEL: @xvsadd_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } -// CHECK-LABEL: @xvsadd_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } -// CHECK-LABEL: @xvsadd_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } -// CHECK-LABEL: @xvavg_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } -// CHECK-LABEL: @xvavg_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } -// CHECK-LABEL: @xvavg_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } -// CHECK-LABEL: @xvavg_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } -// CHECK-LABEL: @xvavg_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } -// CHECK-LABEL: @xvavg_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } -// CHECK-LABEL: @xvavg_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } -// CHECK-LABEL: @xvavg_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } -// CHECK-LABEL: @xvavgr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } -// CHECK-LABEL: @xvavgr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } -// CHECK-LABEL: @xvavgr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } -// CHECK-LABEL: @xvavgr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } -// CHECK-LABEL: @xvavgr_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } -// CHECK-LABEL: @xvavgr_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } -// CHECK-LABEL: @xvavgr_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } -// CHECK-LABEL: @xvavgr_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } -// CHECK-LABEL: @xvssub_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } -// CHECK-LABEL: @xvssub_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } -// CHECK-LABEL: @xvssub_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } -// CHECK-LABEL: @xvssub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } -// CHECK-LABEL: @xvssub_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } -// CHECK-LABEL: @xvssub_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } -// CHECK-LABEL: @xvssub_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } -// CHECK-LABEL: @xvssub_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } -// CHECK-LABEL: @xvabsd_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } -// CHECK-LABEL: @xvabsd_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } -// CHECK-LABEL: @xvabsd_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } -// CHECK-LABEL: @xvabsd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } -// CHECK-LABEL: @xvabsd_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } -// CHECK-LABEL: @xvabsd_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } -// CHECK-LABEL: @xvabsd_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } -// CHECK-LABEL: @xvabsd_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } -// CHECK-LABEL: @xvmul_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmul_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } -// CHECK-LABEL: @xvmul_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmul_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } -// CHECK-LABEL: @xvmul_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmul_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } -// CHECK-LABEL: @xvmul_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmul_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } -// CHECK-LABEL: @xvmadd_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmadd_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } -// CHECK-LABEL: @xvmadd_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmadd_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } -// CHECK-LABEL: @xvmadd_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmadd_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } -// CHECK-LABEL: @xvmadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } -// CHECK-LABEL: @xvmsub_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsub_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } -// CHECK-LABEL: @xvmsub_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsub_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } -// CHECK-LABEL: @xvmsub_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsub_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } -// CHECK-LABEL: @xvmsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } -// CHECK-LABEL: @xvdiv_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } -// CHECK-LABEL: @xvdiv_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } -// CHECK-LABEL: @xvdiv_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } -// CHECK-LABEL: @xvdiv_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } -// CHECK-LABEL: @xvdiv_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } -// CHECK-LABEL: @xvdiv_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } -// CHECK-LABEL: @xvdiv_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } -// CHECK-LABEL: @xvdiv_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } -// CHECK-LABEL: @xvhaddw_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } -// CHECK-LABEL: @xvhaddw_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } -// CHECK-LABEL: @xvhaddw_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } -// CHECK-LABEL: @xvhaddw_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } -// CHECK-LABEL: @xvhaddw_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } -// CHECK-LABEL: @xvhaddw_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } -// CHECK-LABEL: @xvhsubw_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } -// CHECK-LABEL: @xvhsubw_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } -// CHECK-LABEL: @xvhsubw_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } -// CHECK-LABEL: @xvhsubw_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } -// CHECK-LABEL: @xvhsubw_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } -// CHECK-LABEL: @xvhsubw_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } -// CHECK-LABEL: @xvmod_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } -// CHECK-LABEL: @xvmod_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } -// CHECK-LABEL: @xvmod_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } -// CHECK-LABEL: @xvmod_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } -// CHECK-LABEL: @xvmod_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } -// CHECK-LABEL: @xvmod_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } -// CHECK-LABEL: @xvmod_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } -// CHECK-LABEL: @xvmod_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } -// CHECK-LABEL: @xvrepl128vei_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrepl128vei_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } -// CHECK-LABEL: @xvrepl128vei_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrepl128vei_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } -// CHECK-LABEL: @xvrepl128vei_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrepl128vei_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } -// CHECK-LABEL: @xvrepl128vei_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrepl128vei_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } -// CHECK-LABEL: @xvpickev_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickev_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } -// CHECK-LABEL: @xvpickev_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickev_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } -// CHECK-LABEL: @xvpickev_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickev_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } -// CHECK-LABEL: @xvpickev_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickev_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } -// CHECK-LABEL: @xvpickod_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickod_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } -// CHECK-LABEL: @xvpickod_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickod_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } -// CHECK-LABEL: @xvpickod_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickod_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } -// CHECK-LABEL: @xvpickod_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickod_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } -// CHECK-LABEL: @xvilvh_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvh_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } -// CHECK-LABEL: @xvilvh_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvh_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } -// CHECK-LABEL: @xvilvh_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvh_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } -// CHECK-LABEL: @xvilvh_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvh_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } -// CHECK-LABEL: @xvilvl_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvl_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } -// CHECK-LABEL: @xvilvl_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvl_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } -// CHECK-LABEL: @xvilvl_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvl_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } -// CHECK-LABEL: @xvilvl_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvl_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } -// CHECK-LABEL: @xvpackev_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackev_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } -// CHECK-LABEL: @xvpackev_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackev_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } -// CHECK-LABEL: @xvpackev_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackev_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } -// CHECK-LABEL: @xvpackev_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackev_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } -// CHECK-LABEL: @xvpackod_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackod_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } -// CHECK-LABEL: @xvpackod_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackod_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } -// CHECK-LABEL: @xvpackod_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackod_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } -// CHECK-LABEL: @xvpackod_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackod_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } -// CHECK-LABEL: @xvshuf_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } -// CHECK-LABEL: @xvshuf_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } -// CHECK-LABEL: @xvshuf_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } -// CHECK-LABEL: @xvshuf_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } -// CHECK-LABEL: @xvand_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvand_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } -// CHECK-LABEL: @xvandi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvandi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } -// CHECK-LABEL: @xvor_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvor_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } -// CHECK-LABEL: @xvori_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvori_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } -// CHECK-LABEL: @xvnor_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvnor_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } -// CHECK-LABEL: @xvnori_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvnori_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } -// CHECK-LABEL: @xvxor_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvxor_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } -// CHECK-LABEL: @xvxori_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvxori_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } -// CHECK-LABEL: @xvbitsel_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitsel_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } -// CHECK-LABEL: @xvbitseli_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseli_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } -// CHECK-LABEL: @xvshuf4i_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf4i_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } -// CHECK-LABEL: @xvshuf4i_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf4i_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } -// CHECK-LABEL: @xvshuf4i_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf4i_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } -// CHECK-LABEL: @xvreplgr2vr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplgr2vr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } -// CHECK-LABEL: @xvreplgr2vr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplgr2vr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } -// CHECK-LABEL: @xvreplgr2vr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplgr2vr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } -// CHECK-LABEL: @xvreplgr2vr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +// CHECK-LABEL: define dso_local void @xvreplgr2vr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } -// CHECK-LABEL: @xvpcnt_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpcnt_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } -// CHECK-LABEL: @xvpcnt_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpcnt_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } -// CHECK-LABEL: @xvpcnt_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpcnt_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } -// CHECK-LABEL: @xvpcnt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpcnt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } -// CHECK-LABEL: @xvclo_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclo_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } -// CHECK-LABEL: @xvclo_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclo_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } -// CHECK-LABEL: @xvclo_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclo_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } -// CHECK-LABEL: @xvclo_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclo_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } -// CHECK-LABEL: @xvclz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclz_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } -// CHECK-LABEL: @xvclz_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclz_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } -// CHECK-LABEL: @xvclz_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclz_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } -// CHECK-LABEL: @xvclz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclz_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } -// CHECK-LABEL: @xvfadd_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfadd_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } -// CHECK-LABEL: @xvfadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } -// CHECK-LABEL: @xvfsub_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfsub_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } -// CHECK-LABEL: @xvfsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } -// CHECK-LABEL: @xvfmul_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmul_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } -// CHECK-LABEL: @xvfmul_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmul_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } -// CHECK-LABEL: @xvfdiv_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfdiv_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } -// CHECK-LABEL: @xvfdiv_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfdiv_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } -// CHECK-LABEL: @xvfcvt_h_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvt_h_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } -// CHECK-LABEL: @xvfcvt_s_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvt_s_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } -// CHECK-LABEL: @xvfmin_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmin_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } -// CHECK-LABEL: @xvfmin_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmin_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } -// CHECK-LABEL: @xvfmina_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmina_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } -// CHECK-LABEL: @xvfmina_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmina_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } -// CHECK-LABEL: @xvfmax_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmax_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } -// CHECK-LABEL: @xvfmax_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmax_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } -// CHECK-LABEL: @xvfmaxa_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmaxa_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } -// CHECK-LABEL: @xvfmaxa_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmaxa_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } -// CHECK-LABEL: @xvfclass_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfclass_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } -// CHECK-LABEL: @xvfclass_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfclass_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } -// CHECK-LABEL: @xvfsqrt_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfsqrt_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } -// CHECK-LABEL: @xvfsqrt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfsqrt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } -// CHECK-LABEL: @xvfrecip_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrecip_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } -// CHECK-LABEL: @xvfrecip_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrecip_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } -// CHECK-LABEL: @xvfrint_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrint_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } -// CHECK-LABEL: @xvfrint_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrint_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } -// CHECK-LABEL: @xvfrsqrt_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrsqrt_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } -// CHECK-LABEL: @xvfrsqrt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrsqrt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } -// CHECK-LABEL: @xvflogb_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvflogb_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } -// CHECK-LABEL: @xvflogb_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvflogb_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } -// CHECK-LABEL: @xvfcvth_s_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvth_s_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } -// CHECK-LABEL: @xvfcvth_d_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvth_d_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } -// CHECK-LABEL: @xvfcvtl_s_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvtl_s_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } -// CHECK-LABEL: @xvfcvtl_d_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvtl_d_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } -// CHECK-LABEL: @xvftint_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } -// CHECK-LABEL: @xvftint_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } -// CHECK-LABEL: @xvftint_wu_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_wu_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } -// CHECK-LABEL: @xvftint_lu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_lu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } -// CHECK-LABEL: @xvftintrz_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } -// CHECK-LABEL: @xvftintrz_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } -// CHECK-LABEL: @xvftintrz_wu_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_wu_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } -// CHECK-LABEL: @xvftintrz_lu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_lu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } -// CHECK-LABEL: @xvffint_s_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_s_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } -// CHECK-LABEL: @xvffint_d_l( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_d_l( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } -// CHECK-LABEL: @xvffint_s_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_s_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } -// CHECK-LABEL: @xvffint_d_lu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_d_lu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } -// CHECK-LABEL: @xvreplve_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } -// CHECK-LABEL: @xvreplve_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } -// CHECK-LABEL: @xvreplve_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } -// CHECK-LABEL: @xvreplve_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } -// CHECK-LABEL: @xvpermi_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpermi_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } -// CHECK-LABEL: @xvandn_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvandn_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } -// CHECK-LABEL: @xvneg_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvneg_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } -// CHECK-LABEL: @xvneg_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvneg_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } -// CHECK-LABEL: @xvneg_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvneg_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } -// CHECK-LABEL: @xvneg_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvneg_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } -// CHECK-LABEL: @xvmuh_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } -// CHECK-LABEL: @xvmuh_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } -// CHECK-LABEL: @xvmuh_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } -// CHECK-LABEL: @xvmuh_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } -// CHECK-LABEL: @xvmuh_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } -// CHECK-LABEL: @xvmuh_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } -// CHECK-LABEL: @xvmuh_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } -// CHECK-LABEL: @xvmuh_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } -// CHECK-LABEL: @xvsllwil_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } -// CHECK-LABEL: @xvsllwil_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } -// CHECK-LABEL: @xvsllwil_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } -// CHECK-LABEL: @xvsllwil_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } -// CHECK-LABEL: @xvsllwil_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } -// CHECK-LABEL: @xvsllwil_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } -// CHECK-LABEL: @xvsran_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsran_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } -// CHECK-LABEL: @xvsran_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsran_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } -// CHECK-LABEL: @xvsran_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsran_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } -// CHECK-LABEL: @xvssran_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } -// CHECK-LABEL: @xvssran_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } -// CHECK-LABEL: @xvssran_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } -// CHECK-LABEL: @xvssran_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } -// CHECK-LABEL: @xvssran_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } -// CHECK-LABEL: @xvssran_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } -// CHECK-LABEL: @xvsrarn_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarn_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } -// CHECK-LABEL: @xvsrarn_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarn_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } -// CHECK-LABEL: @xvsrarn_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarn_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } -// CHECK-LABEL: @xvssrarn_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } -// CHECK-LABEL: @xvssrarn_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } -// CHECK-LABEL: @xvssrarn_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } -// CHECK-LABEL: @xvssrarn_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } -// CHECK-LABEL: @xvssrarn_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } -// CHECK-LABEL: @xvssrarn_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } -// CHECK-LABEL: @xvsrln_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrln_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } -// CHECK-LABEL: @xvsrln_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrln_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } -// CHECK-LABEL: @xvsrln_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrln_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } -// CHECK-LABEL: @xvssrln_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } -// CHECK-LABEL: @xvssrln_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } -// CHECK-LABEL: @xvssrln_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } -// CHECK-LABEL: @xvsrlrn_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrn_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } -// CHECK-LABEL: @xvsrlrn_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrn_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } -// CHECK-LABEL: @xvsrlrn_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrn_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } -// CHECK-LABEL: @xvssrlrn_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } -// CHECK-LABEL: @xvssrlrn_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } -// CHECK-LABEL: @xvssrlrn_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } -// CHECK-LABEL: @xvfrstpi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrstpi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } -// CHECK-LABEL: @xvfrstpi_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrstpi_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } -// CHECK-LABEL: @xvfrstp_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrstp_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } -// CHECK-LABEL: @xvfrstp_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrstp_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } -// CHECK-LABEL: @xvshuf4i_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf4i_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } -// CHECK-LABEL: @xvbsrl_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbsrl_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } -// CHECK-LABEL: @xvbsll_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbsll_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } -// CHECK-LABEL: @xvextrins_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextrins_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } -// CHECK-LABEL: @xvextrins_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextrins_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } -// CHECK-LABEL: @xvextrins_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextrins_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } -// CHECK-LABEL: @xvextrins_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextrins_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } -// CHECK-LABEL: @xvmskltz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskltz_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } -// CHECK-LABEL: @xvmskltz_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskltz_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } -// CHECK-LABEL: @xvmskltz_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskltz_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } -// CHECK-LABEL: @xvmskltz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskltz_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } -// CHECK-LABEL: @xvsigncov_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsigncov_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } -// CHECK-LABEL: @xvsigncov_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsigncov_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } -// CHECK-LABEL: @xvsigncov_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsigncov_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } -// CHECK-LABEL: @xvsigncov_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsigncov_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } -// CHECK-LABEL: @xvfmadd_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmadd_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } -// CHECK-LABEL: @xvfmadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } -// CHECK-LABEL: @xvfmsub_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmsub_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } -// CHECK-LABEL: @xvfmsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } -// CHECK-LABEL: @xvfnmadd_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfnmadd_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } -// CHECK-LABEL: @xvfnmadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfnmadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } -// CHECK-LABEL: @xvfnmsub_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfnmsub_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } -// CHECK-LABEL: @xvfnmsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfnmsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } -// CHECK-LABEL: @xvftintrne_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrne_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } -// CHECK-LABEL: @xvftintrne_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrne_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } -// CHECK-LABEL: @xvftintrp_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrp_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } -// CHECK-LABEL: @xvftintrp_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrp_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } -// CHECK-LABEL: @xvftintrm_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrm_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } -// CHECK-LABEL: @xvftintrm_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrm_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } -// CHECK-LABEL: @xvftint_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } -// CHECK-LABEL: @xvffint_s_l( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_s_l( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } -// CHECK-LABEL: @xvftintrz_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } -// CHECK-LABEL: @xvftintrp_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrp_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } -// CHECK-LABEL: @xvftintrm_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrm_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } -// CHECK-LABEL: @xvftintrne_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrne_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } -// CHECK-LABEL: @xvftinth_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftinth_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } -// CHECK-LABEL: @xvftintl_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintl_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } -// CHECK-LABEL: @xvffinth_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffinth_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } -// CHECK-LABEL: @xvffintl_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffintl_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } -// CHECK-LABEL: @xvftintrzh_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrzh_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } -// CHECK-LABEL: @xvftintrzl_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrzl_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } -// CHECK-LABEL: @xvftintrph_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrph_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } -// CHECK-LABEL: @xvftintrpl_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrpl_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } -// CHECK-LABEL: @xvftintrmh_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrmh_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } -// CHECK-LABEL: @xvftintrml_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrml_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } -// CHECK-LABEL: @xvftintrneh_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrneh_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } -// CHECK-LABEL: @xvftintrnel_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrnel_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } -// CHECK-LABEL: @xvfrintrne_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrne_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } -// CHECK-LABEL: @xvfrintrne_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrne_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } -// CHECK-LABEL: @xvfrintrz_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrz_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } -// CHECK-LABEL: @xvfrintrz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrz_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } -// CHECK-LABEL: @xvfrintrp_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrp_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } -// CHECK-LABEL: @xvfrintrp_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrp_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } -// CHECK-LABEL: @xvfrintrm_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrm_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } -// CHECK-LABEL: @xvfrintrm_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrm_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } -// CHECK-LABEL: @xvld( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvld( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } -// CHECK-LABEL: @xvst( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) +// CHECK-LABEL: define dso_local void @xvst( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2]], i32 1) // CHECK-NEXT: ret void // void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } -// CHECK-LABEL: @xvstelm_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-LABEL: define dso_local void @xvstelm_b( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2]], i32 1, i32 1) // CHECK-NEXT: ret void // void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } -// CHECK-LABEL: @xvstelm_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-LABEL: define dso_local void @xvstelm_h( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2]], i32 2, i32 1) // CHECK-NEXT: ret void // void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } -// CHECK-LABEL: @xvstelm_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-LABEL: define dso_local void @xvstelm_w( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2]], i32 4, i32 1) // CHECK-NEXT: ret void // void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } -// CHECK-LABEL: @xvstelm_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-LABEL: define dso_local void @xvstelm_d( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2]], i32 8, i32 1) // CHECK-NEXT: ret void // void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } -// CHECK-LABEL: @xvinsve0_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvinsve0_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } -// CHECK-LABEL: @xvinsve0_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvinsve0_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } -// CHECK-LABEL: @xvpickve_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickve_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } -// CHECK-LABEL: @xvpickve_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickve_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } -// CHECK-LABEL: @xvssrlrn_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } -// CHECK-LABEL: @xvssrlrn_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } -// CHECK-LABEL: @xvssrlrn_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } -// CHECK-LABEL: @xvssrln_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } -// CHECK-LABEL: @xvssrln_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } -// CHECK-LABEL: @xvssrln_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } -// CHECK-LABEL: @xvorn_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvorn_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } -// CHECK-LABEL: @xvldi( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvldi( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvldi() { return __lasx_xvldi(1); } -// CHECK-LABEL: @xvldx( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]] -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldx( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1]], i64 1), !noalias [[META5:![0-9]+]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } -// CHECK-LABEL: @xvstx( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1) +// CHECK-LABEL: define dso_local void @xvstx( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2]], i64 1) // CHECK-NEXT: ret void // void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } -// CHECK-LABEL: @xvextl_qu_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextl_qu_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } -// CHECK-LABEL: @xvinsgr2vr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvinsgr2vr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } -// CHECK-LABEL: @xvinsgr2vr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvinsgr2vr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } -// CHECK-LABEL: @xvreplve0_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } -// CHECK-LABEL: @xvreplve0_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } -// CHECK-LABEL: @xvreplve0_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } -// CHECK-LABEL: @xvreplve0_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } -// CHECK-LABEL: @xvreplve0_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } -// CHECK-LABEL: @vext2xv_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } -// CHECK-LABEL: @vext2xv_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } -// CHECK-LABEL: @vext2xv_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } -// CHECK-LABEL: @vext2xv_w_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_w_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } -// CHECK-LABEL: @vext2xv_d_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_d_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } -// CHECK-LABEL: @vext2xv_d_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_d_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } -// CHECK-LABEL: @vext2xv_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } -// CHECK-LABEL: @vext2xv_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } -// CHECK-LABEL: @vext2xv_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } -// CHECK-LABEL: @vext2xv_wu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_wu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } -// CHECK-LABEL: @vext2xv_du_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_du_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } -// CHECK-LABEL: @vext2xv_du_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_du_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } -// CHECK-LABEL: @xvpermi_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpermi_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } -// CHECK-LABEL: @xvpermi_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpermi_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } -// CHECK-LABEL: @xvperm_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvperm_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } -// CHECK-LABEL: @xvldrepl_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldrepl_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } -// CHECK-LABEL: @xvldrepl_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldrepl_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1]], i32 2) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } -// CHECK-LABEL: @xvldrepl_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldrepl_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1]], i32 4) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } -// CHECK-LABEL: @xvldrepl_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldrepl_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1]], i32 8) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } -// CHECK-LABEL: @xvpickve2gr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_w( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) // CHECK-NEXT: ret i32 [[TMP1]] // int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } -// CHECK-LABEL: @xvpickve2gr_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_wu( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) // CHECK-NEXT: ret i32 [[TMP1]] // unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } -// CHECK-LABEL: @xvpickve2gr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local i64 @xvpickve2gr_d( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) // CHECK-NEXT: ret i64 [[TMP1]] // long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } -// CHECK-LABEL: @xvpickve2gr_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local i64 @xvpickve2gr_du( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) // CHECK-NEXT: ret i64 [[TMP1]] // unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } -// CHECK-LABEL: @xvaddwev_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } -// CHECK-LABEL: @xvaddwev_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } -// CHECK-LABEL: @xvaddwev_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } -// CHECK-LABEL: @xvaddwev_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } -// CHECK-LABEL: @xvaddwev_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } -// CHECK-LABEL: @xvaddwev_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } -// CHECK-LABEL: @xvaddwev_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } -// CHECK-LABEL: @xvaddwev_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } -// CHECK-LABEL: @xvsubwev_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } -// CHECK-LABEL: @xvsubwev_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } -// CHECK-LABEL: @xvsubwev_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } -// CHECK-LABEL: @xvsubwev_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } -// CHECK-LABEL: @xvsubwev_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } -// CHECK-LABEL: @xvsubwev_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } -// CHECK-LABEL: @xvsubwev_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } -// CHECK-LABEL: @xvsubwev_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } -// CHECK-LABEL: @xvmulwev_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } -// CHECK-LABEL: @xvmulwev_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } -// CHECK-LABEL: @xvmulwev_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } -// CHECK-LABEL: @xvmulwev_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } -// CHECK-LABEL: @xvmulwev_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } -// CHECK-LABEL: @xvmulwev_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } -// CHECK-LABEL: @xvmulwev_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } -// CHECK-LABEL: @xvmulwev_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } -// CHECK-LABEL: @xvaddwod_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } -// CHECK-LABEL: @xvaddwod_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } -// CHECK-LABEL: @xvaddwod_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } -// CHECK-LABEL: @xvaddwod_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } -// CHECK-LABEL: @xvaddwod_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } -// CHECK-LABEL: @xvaddwod_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } -// CHECK-LABEL: @xvaddwod_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } -// CHECK-LABEL: @xvaddwod_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } -// CHECK-LABEL: @xvsubwod_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } -// CHECK-LABEL: @xvsubwod_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } -// CHECK-LABEL: @xvsubwod_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } -// CHECK-LABEL: @xvsubwod_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } -// CHECK-LABEL: @xvsubwod_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } -// CHECK-LABEL: @xvsubwod_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } -// CHECK-LABEL: @xvsubwod_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } -// CHECK-LABEL: @xvsubwod_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } -// CHECK-LABEL: @xvmulwod_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } -// CHECK-LABEL: @xvmulwod_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } -// CHECK-LABEL: @xvmulwod_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } -// CHECK-LABEL: @xvmulwod_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } -// CHECK-LABEL: @xvmulwod_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } -// CHECK-LABEL: @xvmulwod_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } -// CHECK-LABEL: @xvmulwod_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } -// CHECK-LABEL: @xvmulwod_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } -// CHECK-LABEL: @xvaddwev_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } -// CHECK-LABEL: @xvaddwev_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } -// CHECK-LABEL: @xvaddwev_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } -// CHECK-LABEL: @xvmulwev_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } -// CHECK-LABEL: @xvmulwev_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } -// CHECK-LABEL: @xvmulwev_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } -// CHECK-LABEL: @xvaddwod_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } -// CHECK-LABEL: @xvaddwod_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } -// CHECK-LABEL: @xvaddwod_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } -// CHECK-LABEL: @xvmulwod_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } -// CHECK-LABEL: @xvmulwod_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } -// CHECK-LABEL: @xvmulwod_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } -// CHECK-LABEL: @xvhaddw_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } -// CHECK-LABEL: @xvhaddw_qu_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_qu_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } -// CHECK-LABEL: @xvhsubw_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } -// CHECK-LABEL: @xvhsubw_qu_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_qu_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } -// CHECK-LABEL: @xvmaddwev_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } -// CHECK-LABEL: @xvrotr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } -// CHECK-LABEL: @xvrotr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } -// CHECK-LABEL: @xvrotr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } -// CHECK-LABEL: @xvrotr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } -// CHECK-LABEL: @xvadd_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } -// CHECK-LABEL: @xvsub_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } -// CHECK-LABEL: @xvaddwev_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } -// CHECK-LABEL: @xvaddwod_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } -// CHECK-LABEL: @xvmulwev_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } -// CHECK-LABEL: @xvmulwod_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } -// CHECK-LABEL: @xvmskgez_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskgez_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } -// CHECK-LABEL: @xvmsknz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsknz_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } -// CHECK-LABEL: @xvexth_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } -// CHECK-LABEL: @xvexth_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } -// CHECK-LABEL: @xvexth_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } -// CHECK-LABEL: @xvexth_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } -// CHECK-LABEL: @xvexth_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } -// CHECK-LABEL: @xvexth_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } -// CHECK-LABEL: @xvexth_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } -// CHECK-LABEL: @xvexth_qu_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_qu_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } -// CHECK-LABEL: @xvrotri_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotri_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } -// CHECK-LABEL: @xvrotri_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotri_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } -// CHECK-LABEL: @xvrotri_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotri_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } -// CHECK-LABEL: @xvrotri_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotri_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } -// CHECK-LABEL: @xvextl_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextl_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } -// CHECK-LABEL: @xvsrlni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvsrlni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvsrlni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvsrlni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvsrlrni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvsrlrni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvsrlrni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvsrlrni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_du_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_du_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_du_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_du_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } -// CHECK-LABEL: @xvsrani_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrani_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvsrani_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrani_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvsrani_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrani_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvsrani_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrani_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvsrarni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvsrarni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvsrarni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvsrarni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_du_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_du_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_du_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_du_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } -// CHECK-LABEL: @xbnz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_b( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } -// CHECK-LABEL: @xbnz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_d( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } -// CHECK-LABEL: @xbnz_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_h( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } -// CHECK-LABEL: @xbnz_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_v( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } -// CHECK-LABEL: @xbnz_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_w( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } -// CHECK-LABEL: @xbz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_b( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } -// CHECK-LABEL: @xbz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_d( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } -// CHECK-LABEL: @xbz_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_h( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } -// CHECK-LABEL: @xbz_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_v( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } -// CHECK-LABEL: @xbz_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_w( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } -// CHECK-LABEL: @xvfcmp_caf_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_caf_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_caf_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_caf_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_ceq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_ceq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_ceq_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_ceq_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cle_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cle_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cle_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cle_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_clt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_clt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_clt_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_clt_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cne_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cne_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cne_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cne_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cor_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cor_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cor_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cor_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cueq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cueq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cueq_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cueq_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cule_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cule_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cule_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cule_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cult_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cult_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cult_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cult_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cun_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cun_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cune_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cune_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cune_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cune_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cun_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cun_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_saf_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_saf_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_saf_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_saf_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_seq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_seq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_seq_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_seq_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sle_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sle_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sle_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sle_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_slt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_slt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_slt_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_slt_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sne_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sne_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sne_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sne_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sor_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sor_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sor_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sor_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sueq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sueq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sueq_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sueq_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sule_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sule_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sule_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sule_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sult_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sult_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sult_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sult_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sun_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sun_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sune_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sune_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sune_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sune_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sun_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sun_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } -// CHECK-LABEL: @xvpickve_d_f( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickve_d_f( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } -// CHECK-LABEL: @xvpickve_w_f( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickve_w_f( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } -// CHECK-LABEL: @xvrepli_b( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvrepli_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } -// CHECK-LABEL: @xvrepli_d( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvrepli_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } -// CHECK-LABEL: @xvrepli_h( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvrepli_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } -// CHECK-LABEL: @xvrepli_w( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvrepli_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } +//. +// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META5]] = !{[[META6:![0-9]+]]} +// CHECK: [[META6]] = distinct !{[[META6]], [[META7:![0-9]+]], !"__lasx_xvldx: %agg.result"} +// CHECK: [[META7]] = distinct !{[[META7]], !"__lasx_xvldx"} +//. diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c index b79f939403993..b194ea8f3182a 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c @@ -1,37 +1,46 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s #include -// CHECK-LABEL: @xvfrecipe_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @xvfrecipe_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrecipe_s(v8f32 _1) { return __lasx_xvfrecipe_s(_1); } -// CHECK-LABEL: @xvfrecipe_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @xvfrecipe_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrecipe_d(v4f64 _1) { return __lasx_xvfrecipe_d(_1); } -// CHECK-LABEL: @xvfrsqrte_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @xvfrsqrte_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrsqrte_s(v8f32 _1) { return __lasx_xvfrsqrte_s(_1); } -// CHECK-LABEL: @xvfrsqrte_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @xvfrsqrte_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrsqrte_d(v4f64 _1) { return __lasx_xvfrsqrte_d(_1); } +//. +// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c index 63e9ba639ea2c..9d543dfabe3d2 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c @@ -1,38 +1,47 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s typedef float v8f32 __attribute__((vector_size(32), aligned(32))); typedef double v4f64 __attribute__((vector_size(32), aligned(32))); -// CHECK-LABEL: @xvfrecipe_s -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @xvfrecipe_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrecipe_s(v8f32 _1) { return __builtin_lasx_xvfrecipe_s(_1); } -// CHECK-LABEL: @xvfrecipe_d -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @xvfrecipe_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrecipe_d(v4f64 _1) { return __builtin_lasx_xvfrecipe_d(_1); } -// CHECK-LABEL: @xvfrsqrte_s -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @xvfrsqrte_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrsqrte_s(v8f32 _1) { return __builtin_lasx_xvfrsqrte_s(_1); } -// CHECK-LABEL: @xvfrsqrte_d -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @xvfrsqrte_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrsqrte_d(v4f64 _1) { return __builtin_lasx_xvfrsqrte_d(_1); } +//. +// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c index f52a23a5faea7..9b21c7ea3e8a5 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); @@ -25,6384 +25,7125 @@ typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); typedef double v4f64 __attribute__((vector_size(32), aligned(32))); typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); -// CHECK-LABEL: @xvsll_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsll_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } -// CHECK-LABEL: @xvsll_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsll_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } -// CHECK-LABEL: @xvsll_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsll_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } -// CHECK-LABEL: @xvsll_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsll_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } -// CHECK-LABEL: @xvslli_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslli_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } -// CHECK-LABEL: @xvslli_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslli_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } -// CHECK-LABEL: @xvslli_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslli_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } -// CHECK-LABEL: @xvslli_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslli_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } -// CHECK-LABEL: @xvsra_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsra_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } -// CHECK-LABEL: @xvsra_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsra_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } -// CHECK-LABEL: @xvsra_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsra_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } -// CHECK-LABEL: @xvsra_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsra_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } -// CHECK-LABEL: @xvsrai_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrai_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } -// CHECK-LABEL: @xvsrai_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrai_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } -// CHECK-LABEL: @xvsrai_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrai_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } -// CHECK-LABEL: @xvsrai_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrai_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } -// CHECK-LABEL: @xvsrar_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrar_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } -// CHECK-LABEL: @xvsrar_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrar_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } -// CHECK-LABEL: @xvsrar_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrar_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } -// CHECK-LABEL: @xvsrar_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrar_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } -// CHECK-LABEL: @xvsrari_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrari_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } -// CHECK-LABEL: @xvsrari_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrari_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } -// CHECK-LABEL: @xvsrari_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrari_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } -// CHECK-LABEL: @xvsrari_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrari_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } -// CHECK-LABEL: @xvsrl_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrl_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } -// CHECK-LABEL: @xvsrl_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrl_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } -// CHECK-LABEL: @xvsrl_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrl_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } -// CHECK-LABEL: @xvsrl_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrl_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } -// CHECK-LABEL: @xvsrli_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrli_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } -// CHECK-LABEL: @xvsrli_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrli_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } -// CHECK-LABEL: @xvsrli_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrli_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } -// CHECK-LABEL: @xvsrli_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrli_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } -// CHECK-LABEL: @xvsrlr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } -// CHECK-LABEL: @xvsrlr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } -// CHECK-LABEL: @xvsrlr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } -// CHECK-LABEL: @xvsrlr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } -// CHECK-LABEL: @xvsrlri_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlri_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } -// CHECK-LABEL: @xvsrlri_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlri_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } -// CHECK-LABEL: @xvsrlri_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlri_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } -// CHECK-LABEL: @xvsrlri_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlri_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } -// CHECK-LABEL: @xvbitclr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } -// CHECK-LABEL: @xvbitclr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } -// CHECK-LABEL: @xvbitclr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } -// CHECK-LABEL: @xvbitclr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } -// CHECK-LABEL: @xvbitclri_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclri_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } -// CHECK-LABEL: @xvbitclri_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclri_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } -// CHECK-LABEL: @xvbitclri_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclri_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } -// CHECK-LABEL: @xvbitclri_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitclri_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } -// CHECK-LABEL: @xvbitset_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitset_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } -// CHECK-LABEL: @xvbitset_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitset_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } -// CHECK-LABEL: @xvbitset_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitset_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } -// CHECK-LABEL: @xvbitset_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitset_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } -// CHECK-LABEL: @xvbitseti_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseti_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } -// CHECK-LABEL: @xvbitseti_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseti_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } -// CHECK-LABEL: @xvbitseti_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseti_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } -// CHECK-LABEL: @xvbitseti_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseti_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } -// CHECK-LABEL: @xvbitrev_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrev_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } -// CHECK-LABEL: @xvbitrev_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrev_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } -// CHECK-LABEL: @xvbitrev_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrev_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } -// CHECK-LABEL: @xvbitrev_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrev_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } -// CHECK-LABEL: @xvbitrevi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrevi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } -// CHECK-LABEL: @xvbitrevi_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrevi_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } -// CHECK-LABEL: @xvbitrevi_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrevi_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } -// CHECK-LABEL: @xvbitrevi_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitrevi_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } -// CHECK-LABEL: @xvadd_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } -// CHECK-LABEL: @xvadd_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } -// CHECK-LABEL: @xvadd_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } -// CHECK-LABEL: @xvadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } -// CHECK-LABEL: @xvaddi_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddi_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } -// CHECK-LABEL: @xvaddi_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddi_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } -// CHECK-LABEL: @xvaddi_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddi_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } -// CHECK-LABEL: @xvaddi_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddi_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } -// CHECK-LABEL: @xvsub_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } -// CHECK-LABEL: @xvsub_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } -// CHECK-LABEL: @xvsub_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } -// CHECK-LABEL: @xvsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } -// CHECK-LABEL: @xvsubi_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubi_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } -// CHECK-LABEL: @xvsubi_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubi_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } -// CHECK-LABEL: @xvsubi_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubi_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } -// CHECK-LABEL: @xvsubi_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubi_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } -// CHECK-LABEL: @xvmax_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } -// CHECK-LABEL: @xvmax_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } -// CHECK-LABEL: @xvmax_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } -// CHECK-LABEL: @xvmax_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } -// CHECK-LABEL: @xvmaxi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } -// CHECK-LABEL: @xvmaxi_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } -// CHECK-LABEL: @xvmaxi_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } -// CHECK-LABEL: @xvmaxi_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } -// CHECK-LABEL: @xvmax_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } -// CHECK-LABEL: @xvmax_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } -// CHECK-LABEL: @xvmax_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } -// CHECK-LABEL: @xvmax_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmax_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } -// CHECK-LABEL: @xvmaxi_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } -// CHECK-LABEL: @xvmaxi_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } -// CHECK-LABEL: @xvmaxi_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } -// CHECK-LABEL: @xvmaxi_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaxi_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } -// CHECK-LABEL: @xvmin_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } -// CHECK-LABEL: @xvmin_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } -// CHECK-LABEL: @xvmin_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } -// CHECK-LABEL: @xvmin_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } -// CHECK-LABEL: @xvmini_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } -// CHECK-LABEL: @xvmini_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } -// CHECK-LABEL: @xvmini_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } -// CHECK-LABEL: @xvmini_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } -// CHECK-LABEL: @xvmin_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } -// CHECK-LABEL: @xvmin_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } -// CHECK-LABEL: @xvmin_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } -// CHECK-LABEL: @xvmin_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmin_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } -// CHECK-LABEL: @xvmini_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } -// CHECK-LABEL: @xvmini_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } -// CHECK-LABEL: @xvmini_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } -// CHECK-LABEL: @xvmini_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmini_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } -// CHECK-LABEL: @xvseq_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseq_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } -// CHECK-LABEL: @xvseq_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseq_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } -// CHECK-LABEL: @xvseq_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseq_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } -// CHECK-LABEL: @xvseq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } -// CHECK-LABEL: @xvseqi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseqi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } -// CHECK-LABEL: @xvseqi_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseqi_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } -// CHECK-LABEL: @xvseqi_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseqi_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } -// CHECK-LABEL: @xvseqi_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvseqi_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } -// CHECK-LABEL: @xvslt_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } -// CHECK-LABEL: @xvslt_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } -// CHECK-LABEL: @xvslt_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } -// CHECK-LABEL: @xvslt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } -// CHECK-LABEL: @xvslti_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } -// CHECK-LABEL: @xvslti_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } -// CHECK-LABEL: @xvslti_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } -// CHECK-LABEL: @xvslti_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } -// CHECK-LABEL: @xvslt_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } -// CHECK-LABEL: @xvslt_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } -// CHECK-LABEL: @xvslt_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } -// CHECK-LABEL: @xvslt_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslt_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } -// CHECK-LABEL: @xvslti_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } -// CHECK-LABEL: @xvslti_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } -// CHECK-LABEL: @xvslti_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } -// CHECK-LABEL: @xvslti_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslti_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } -// CHECK-LABEL: @xvsle_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } -// CHECK-LABEL: @xvsle_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } -// CHECK-LABEL: @xvsle_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } -// CHECK-LABEL: @xvsle_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } -// CHECK-LABEL: @xvslei_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } -// CHECK-LABEL: @xvslei_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } -// CHECK-LABEL: @xvslei_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } -// CHECK-LABEL: @xvslei_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } -// CHECK-LABEL: @xvsle_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } -// CHECK-LABEL: @xvsle_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } -// CHECK-LABEL: @xvsle_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } -// CHECK-LABEL: @xvsle_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsle_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } -// CHECK-LABEL: @xvslei_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } -// CHECK-LABEL: @xvslei_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } -// CHECK-LABEL: @xvslei_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } -// CHECK-LABEL: @xvslei_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvslei_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } -// CHECK-LABEL: @xvsat_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } -// CHECK-LABEL: @xvsat_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } -// CHECK-LABEL: @xvsat_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } -// CHECK-LABEL: @xvsat_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } -// CHECK-LABEL: @xvsat_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } -// CHECK-LABEL: @xvsat_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } -// CHECK-LABEL: @xvsat_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } -// CHECK-LABEL: @xvsat_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsat_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } -// CHECK-LABEL: @xvadda_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadda_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } -// CHECK-LABEL: @xvadda_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadda_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } -// CHECK-LABEL: @xvadda_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadda_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } -// CHECK-LABEL: @xvadda_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadda_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } -// CHECK-LABEL: @xvsadd_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } -// CHECK-LABEL: @xvsadd_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } -// CHECK-LABEL: @xvsadd_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } -// CHECK-LABEL: @xvsadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } -// CHECK-LABEL: @xvsadd_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } -// CHECK-LABEL: @xvsadd_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } -// CHECK-LABEL: @xvsadd_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } -// CHECK-LABEL: @xvsadd_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsadd_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } -// CHECK-LABEL: @xvavg_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } -// CHECK-LABEL: @xvavg_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } -// CHECK-LABEL: @xvavg_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } -// CHECK-LABEL: @xvavg_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } -// CHECK-LABEL: @xvavg_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } -// CHECK-LABEL: @xvavg_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } -// CHECK-LABEL: @xvavg_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } -// CHECK-LABEL: @xvavg_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavg_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } -// CHECK-LABEL: @xvavgr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } -// CHECK-LABEL: @xvavgr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } -// CHECK-LABEL: @xvavgr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } -// CHECK-LABEL: @xvavgr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } -// CHECK-LABEL: @xvavgr_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } -// CHECK-LABEL: @xvavgr_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } -// CHECK-LABEL: @xvavgr_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } -// CHECK-LABEL: @xvavgr_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvavgr_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } -// CHECK-LABEL: @xvssub_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } -// CHECK-LABEL: @xvssub_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } -// CHECK-LABEL: @xvssub_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } -// CHECK-LABEL: @xvssub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } -// CHECK-LABEL: @xvssub_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } -// CHECK-LABEL: @xvssub_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } -// CHECK-LABEL: @xvssub_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } -// CHECK-LABEL: @xvssub_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssub_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } -// CHECK-LABEL: @xvabsd_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } -// CHECK-LABEL: @xvabsd_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } -// CHECK-LABEL: @xvabsd_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } -// CHECK-LABEL: @xvabsd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } -// CHECK-LABEL: @xvabsd_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } -// CHECK-LABEL: @xvabsd_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } -// CHECK-LABEL: @xvabsd_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } -// CHECK-LABEL: @xvabsd_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvabsd_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } -// CHECK-LABEL: @xvmul_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmul_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } -// CHECK-LABEL: @xvmul_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmul_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } -// CHECK-LABEL: @xvmul_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmul_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } -// CHECK-LABEL: @xvmul_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmul_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } -// CHECK-LABEL: @xvmadd_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmadd_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } -// CHECK-LABEL: @xvmadd_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmadd_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } -// CHECK-LABEL: @xvmadd_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmadd_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } -// CHECK-LABEL: @xvmadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } -// CHECK-LABEL: @xvmsub_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsub_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } -// CHECK-LABEL: @xvmsub_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsub_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } -// CHECK-LABEL: @xvmsub_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsub_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } -// CHECK-LABEL: @xvmsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } -// CHECK-LABEL: @xvdiv_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } -// CHECK-LABEL: @xvdiv_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } -// CHECK-LABEL: @xvdiv_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } -// CHECK-LABEL: @xvdiv_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } -// CHECK-LABEL: @xvdiv_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } -// CHECK-LABEL: @xvdiv_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } -// CHECK-LABEL: @xvdiv_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } -// CHECK-LABEL: @xvdiv_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvdiv_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } -// CHECK-LABEL: @xvhaddw_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } -// CHECK-LABEL: @xvhaddw_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } -// CHECK-LABEL: @xvhaddw_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } -// CHECK-LABEL: @xvhaddw_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } -// CHECK-LABEL: @xvhaddw_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } -// CHECK-LABEL: @xvhaddw_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } -// CHECK-LABEL: @xvhsubw_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } -// CHECK-LABEL: @xvhsubw_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } -// CHECK-LABEL: @xvhsubw_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } -// CHECK-LABEL: @xvhsubw_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } -// CHECK-LABEL: @xvhsubw_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } -// CHECK-LABEL: @xvhsubw_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } -// CHECK-LABEL: @xvmod_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } -// CHECK-LABEL: @xvmod_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } -// CHECK-LABEL: @xvmod_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } -// CHECK-LABEL: @xvmod_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } -// CHECK-LABEL: @xvmod_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } -// CHECK-LABEL: @xvmod_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } -// CHECK-LABEL: @xvmod_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } -// CHECK-LABEL: @xvmod_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmod_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } -// CHECK-LABEL: @xvrepl128vei_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrepl128vei_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } -// CHECK-LABEL: @xvrepl128vei_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrepl128vei_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } -// CHECK-LABEL: @xvrepl128vei_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrepl128vei_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } -// CHECK-LABEL: @xvrepl128vei_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrepl128vei_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } -// CHECK-LABEL: @xvpickev_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickev_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } -// CHECK-LABEL: @xvpickev_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickev_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } -// CHECK-LABEL: @xvpickev_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickev_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } -// CHECK-LABEL: @xvpickev_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickev_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } -// CHECK-LABEL: @xvpickod_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickod_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } -// CHECK-LABEL: @xvpickod_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickod_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } -// CHECK-LABEL: @xvpickod_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickod_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } -// CHECK-LABEL: @xvpickod_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickod_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } -// CHECK-LABEL: @xvilvh_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvh_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } -// CHECK-LABEL: @xvilvh_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvh_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } -// CHECK-LABEL: @xvilvh_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvh_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } -// CHECK-LABEL: @xvilvh_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvh_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } -// CHECK-LABEL: @xvilvl_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvl_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } -// CHECK-LABEL: @xvilvl_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvl_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } -// CHECK-LABEL: @xvilvl_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvl_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } -// CHECK-LABEL: @xvilvl_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvilvl_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } -// CHECK-LABEL: @xvpackev_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackev_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } -// CHECK-LABEL: @xvpackev_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackev_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } -// CHECK-LABEL: @xvpackev_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackev_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } -// CHECK-LABEL: @xvpackev_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackev_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } -// CHECK-LABEL: @xvpackod_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackod_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } -// CHECK-LABEL: @xvpackod_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackod_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } -// CHECK-LABEL: @xvpackod_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackod_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } -// CHECK-LABEL: @xvpackod_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpackod_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } -// CHECK-LABEL: @xvshuf_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } -// CHECK-LABEL: @xvshuf_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } -// CHECK-LABEL: @xvshuf_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } -// CHECK-LABEL: @xvshuf_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } -// CHECK-LABEL: @xvand_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvand_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } -// CHECK-LABEL: @xvandi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvandi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } -// CHECK-LABEL: @xvor_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvor_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } -// CHECK-LABEL: @xvori_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvori_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } -// CHECK-LABEL: @xvnor_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvnor_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } -// CHECK-LABEL: @xvnori_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvnori_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } -// CHECK-LABEL: @xvxor_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvxor_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } -// CHECK-LABEL: @xvxori_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvxori_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } -// CHECK-LABEL: @xvbitsel_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitsel_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } -// CHECK-LABEL: @xvbitseli_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbitseli_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } -// CHECK-LABEL: @xvshuf4i_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf4i_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } -// CHECK-LABEL: @xvshuf4i_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf4i_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } -// CHECK-LABEL: @xvshuf4i_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf4i_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } -// CHECK-LABEL: @xvreplgr2vr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplgr2vr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } -// CHECK-LABEL: @xvreplgr2vr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplgr2vr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } -// CHECK-LABEL: @xvreplgr2vr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplgr2vr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } -// CHECK-LABEL: @xvreplgr2vr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +// CHECK-LABEL: define dso_local void @xvreplgr2vr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i32 noundef signext [[_1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } -// CHECK-LABEL: @xvpcnt_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpcnt_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } -// CHECK-LABEL: @xvpcnt_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpcnt_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } -// CHECK-LABEL: @xvpcnt_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpcnt_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } -// CHECK-LABEL: @xvpcnt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpcnt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } -// CHECK-LABEL: @xvclo_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclo_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } -// CHECK-LABEL: @xvclo_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclo_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } -// CHECK-LABEL: @xvclo_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclo_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } -// CHECK-LABEL: @xvclo_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclo_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } -// CHECK-LABEL: @xvclz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclz_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } -// CHECK-LABEL: @xvclz_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclz_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } -// CHECK-LABEL: @xvclz_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclz_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } -// CHECK-LABEL: @xvclz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvclz_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } -// CHECK-LABEL: @xvfadd_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfadd_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } -// CHECK-LABEL: @xvfadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } -// CHECK-LABEL: @xvfsub_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfsub_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } -// CHECK-LABEL: @xvfsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } -// CHECK-LABEL: @xvfmul_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmul_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } -// CHECK-LABEL: @xvfmul_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmul_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } -// CHECK-LABEL: @xvfdiv_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfdiv_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } -// CHECK-LABEL: @xvfdiv_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfdiv_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } -// CHECK-LABEL: @xvfcvt_h_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvt_h_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } -// CHECK-LABEL: @xvfcvt_s_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvt_s_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } -// CHECK-LABEL: @xvfmin_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmin_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } -// CHECK-LABEL: @xvfmin_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmin_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } -// CHECK-LABEL: @xvfmina_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmina_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } -// CHECK-LABEL: @xvfmina_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmina_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } -// CHECK-LABEL: @xvfmax_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmax_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } -// CHECK-LABEL: @xvfmax_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmax_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } -// CHECK-LABEL: @xvfmaxa_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmaxa_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } -// CHECK-LABEL: @xvfmaxa_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmaxa_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } -// CHECK-LABEL: @xvfclass_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfclass_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } -// CHECK-LABEL: @xvfclass_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfclass_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } -// CHECK-LABEL: @xvfsqrt_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfsqrt_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } -// CHECK-LABEL: @xvfsqrt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfsqrt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } -// CHECK-LABEL: @xvfrecip_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrecip_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } -// CHECK-LABEL: @xvfrecip_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrecip_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } -// CHECK-LABEL: @xvfrint_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrint_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } -// CHECK-LABEL: @xvfrint_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrint_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } -// CHECK-LABEL: @xvfrsqrt_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrsqrt_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } -// CHECK-LABEL: @xvfrsqrt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrsqrt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } -// CHECK-LABEL: @xvflogb_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvflogb_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } -// CHECK-LABEL: @xvflogb_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvflogb_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } -// CHECK-LABEL: @xvfcvth_s_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvth_s_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } -// CHECK-LABEL: @xvfcvth_d_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvth_d_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } -// CHECK-LABEL: @xvfcvtl_s_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvtl_s_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } -// CHECK-LABEL: @xvfcvtl_d_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcvtl_d_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } -// CHECK-LABEL: @xvftint_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } -// CHECK-LABEL: @xvftint_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } -// CHECK-LABEL: @xvftint_wu_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_wu_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } -// CHECK-LABEL: @xvftint_lu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_lu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } -// CHECK-LABEL: @xvftintrz_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } -// CHECK-LABEL: @xvftintrz_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } -// CHECK-LABEL: @xvftintrz_wu_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_wu_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } -// CHECK-LABEL: @xvftintrz_lu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_lu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } -// CHECK-LABEL: @xvffint_s_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_s_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } -// CHECK-LABEL: @xvffint_d_l( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_d_l( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } -// CHECK-LABEL: @xvffint_s_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_s_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } -// CHECK-LABEL: @xvffint_d_lu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_d_lu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } -// CHECK-LABEL: @xvreplve_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2:%.*]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } -// CHECK-LABEL: @xvreplve_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2:%.*]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } -// CHECK-LABEL: @xvreplve_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2:%.*]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } -// CHECK-LABEL: @xvreplve_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i32 noundef signext [[_2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } -// CHECK-LABEL: @xvpermi_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpermi_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } -// CHECK-LABEL: @xvandn_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvandn_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } -// CHECK-LABEL: @xvneg_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvneg_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } -// CHECK-LABEL: @xvneg_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvneg_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } -// CHECK-LABEL: @xvneg_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvneg_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } -// CHECK-LABEL: @xvneg_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvneg_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } -// CHECK-LABEL: @xvmuh_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } -// CHECK-LABEL: @xvmuh_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } -// CHECK-LABEL: @xvmuh_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } -// CHECK-LABEL: @xvmuh_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } -// CHECK-LABEL: @xvmuh_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } -// CHECK-LABEL: @xvmuh_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } -// CHECK-LABEL: @xvmuh_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } -// CHECK-LABEL: @xvmuh_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmuh_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } -// CHECK-LABEL: @xvsllwil_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } -// CHECK-LABEL: @xvsllwil_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } -// CHECK-LABEL: @xvsllwil_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } -// CHECK-LABEL: @xvsllwil_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } -// CHECK-LABEL: @xvsllwil_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } -// CHECK-LABEL: @xvsllwil_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsllwil_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } -// CHECK-LABEL: @xvsran_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsran_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } -// CHECK-LABEL: @xvsran_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsran_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } -// CHECK-LABEL: @xvsran_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsran_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } -// CHECK-LABEL: @xvssran_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } -// CHECK-LABEL: @xvssran_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } -// CHECK-LABEL: @xvssran_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } -// CHECK-LABEL: @xvssran_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } -// CHECK-LABEL: @xvssran_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } -// CHECK-LABEL: @xvssran_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssran_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } -// CHECK-LABEL: @xvsrarn_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarn_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } -// CHECK-LABEL: @xvsrarn_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarn_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } -// CHECK-LABEL: @xvsrarn_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarn_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } -// CHECK-LABEL: @xvssrarn_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } -// CHECK-LABEL: @xvssrarn_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } -// CHECK-LABEL: @xvssrarn_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } -// CHECK-LABEL: @xvssrarn_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } -// CHECK-LABEL: @xvssrarn_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } -// CHECK-LABEL: @xvssrarn_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarn_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } -// CHECK-LABEL: @xvsrln_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrln_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } -// CHECK-LABEL: @xvsrln_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrln_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } -// CHECK-LABEL: @xvsrln_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrln_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } -// CHECK-LABEL: @xvssrln_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } -// CHECK-LABEL: @xvssrln_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } -// CHECK-LABEL: @xvssrln_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } -// CHECK-LABEL: @xvsrlrn_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrn_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } -// CHECK-LABEL: @xvsrlrn_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrn_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } -// CHECK-LABEL: @xvsrlrn_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrn_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } -// CHECK-LABEL: @xvssrlrn_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } -// CHECK-LABEL: @xvssrlrn_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } -// CHECK-LABEL: @xvssrlrn_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } -// CHECK-LABEL: @xvfrstpi_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrstpi_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } -// CHECK-LABEL: @xvfrstpi_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrstpi_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } -// CHECK-LABEL: @xvfrstp_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrstp_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } -// CHECK-LABEL: @xvfrstp_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrstp_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } -// CHECK-LABEL: @xvshuf4i_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvshuf4i_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } -// CHECK-LABEL: @xvbsrl_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbsrl_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } -// CHECK-LABEL: @xvbsll_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvbsll_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } -// CHECK-LABEL: @xvextrins_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextrins_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } -// CHECK-LABEL: @xvextrins_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextrins_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } -// CHECK-LABEL: @xvextrins_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextrins_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } -// CHECK-LABEL: @xvextrins_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextrins_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } -// CHECK-LABEL: @xvmskltz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskltz_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } -// CHECK-LABEL: @xvmskltz_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskltz_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } -// CHECK-LABEL: @xvmskltz_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskltz_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } -// CHECK-LABEL: @xvmskltz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskltz_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } -// CHECK-LABEL: @xvsigncov_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsigncov_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } -// CHECK-LABEL: @xvsigncov_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsigncov_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } -// CHECK-LABEL: @xvsigncov_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsigncov_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } -// CHECK-LABEL: @xvsigncov_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsigncov_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } -// CHECK-LABEL: @xvfmadd_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmadd_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } -// CHECK-LABEL: @xvfmadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } -// CHECK-LABEL: @xvfmsub_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmsub_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } -// CHECK-LABEL: @xvfmsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfmsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } -// CHECK-LABEL: @xvfnmadd_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfnmadd_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } -// CHECK-LABEL: @xvfnmadd_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfnmadd_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } -// CHECK-LABEL: @xvfnmsub_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfnmsub_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } -// CHECK-LABEL: @xvfnmsub_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfnmsub_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } -// CHECK-LABEL: @xvftintrne_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrne_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } -// CHECK-LABEL: @xvftintrne_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrne_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } -// CHECK-LABEL: @xvftintrp_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrp_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } -// CHECK-LABEL: @xvftintrp_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrp_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } -// CHECK-LABEL: @xvftintrm_w_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrm_w_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } -// CHECK-LABEL: @xvftintrm_l_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrm_l_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } -// CHECK-LABEL: @xvftint_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftint_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } -// CHECK-LABEL: @xvffint_s_l( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffint_s_l( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } -// CHECK-LABEL: @xvftintrz_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrz_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } -// CHECK-LABEL: @xvftintrp_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrp_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } -// CHECK-LABEL: @xvftintrm_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrm_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } -// CHECK-LABEL: @xvftintrne_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrne_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } -// CHECK-LABEL: @xvftinth_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftinth_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } -// CHECK-LABEL: @xvftintl_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintl_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } -// CHECK-LABEL: @xvffinth_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffinth_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } -// CHECK-LABEL: @xvffintl_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvffintl_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } -// CHECK-LABEL: @xvftintrzh_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrzh_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } -// CHECK-LABEL: @xvftintrzl_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrzl_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } -// CHECK-LABEL: @xvftintrph_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrph_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } -// CHECK-LABEL: @xvftintrpl_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrpl_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } -// CHECK-LABEL: @xvftintrmh_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrmh_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } -// CHECK-LABEL: @xvftintrml_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrml_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } -// CHECK-LABEL: @xvftintrneh_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrneh_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } -// CHECK-LABEL: @xvftintrnel_l_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvftintrnel_l_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } -// CHECK-LABEL: @xvfrintrne_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrne_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } -// CHECK-LABEL: @xvfrintrne_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrne_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } -// CHECK-LABEL: @xvfrintrz_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrz_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } -// CHECK-LABEL: @xvfrintrz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrz_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } -// CHECK-LABEL: @xvfrintrp_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrp_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } -// CHECK-LABEL: @xvfrintrp_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrp_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } -// CHECK-LABEL: @xvfrintrm_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrm_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } -// CHECK-LABEL: @xvfrintrm_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfrintrm_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } -// CHECK-LABEL: @xvld( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvld( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } -// CHECK-LABEL: @xvst( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) +// CHECK-LABEL: define dso_local void @xvst( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2]], i32 1) // CHECK-NEXT: ret void // void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } -// CHECK-LABEL: @xvstelm_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-LABEL: define dso_local void @xvstelm_b( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2]], i32 1, i32 1) // CHECK-NEXT: ret void // void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } -// CHECK-LABEL: @xvstelm_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-LABEL: define dso_local void @xvstelm_h( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2]], i32 2, i32 1) // CHECK-NEXT: ret void // void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } -// CHECK-LABEL: @xvstelm_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-LABEL: define dso_local void @xvstelm_w( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2]], i32 4, i32 1) // CHECK-NEXT: ret void // void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } -// CHECK-LABEL: @xvstelm_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-LABEL: define dso_local void @xvstelm_d( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2]], i32 8, i32 1) // CHECK-NEXT: ret void // void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } -// CHECK-LABEL: @xvinsve0_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvinsve0_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } -// CHECK-LABEL: @xvinsve0_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvinsve0_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } -// CHECK-LABEL: @xvpickve_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickve_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } -// CHECK-LABEL: @xvpickve_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickve_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } -// CHECK-LABEL: @xvssrlrn_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } -// CHECK-LABEL: @xvssrlrn_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } -// CHECK-LABEL: @xvssrlrn_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrn_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } -// CHECK-LABEL: @xvssrln_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } -// CHECK-LABEL: @xvssrln_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } -// CHECK-LABEL: @xvssrln_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrln_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } -// CHECK-LABEL: @xvorn_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvorn_v( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } -// CHECK-LABEL: @xvldi( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvldi( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvldi() { return __builtin_lasx_xvldi(1); } -// CHECK-LABEL: @xvldx( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldx( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1]], i64 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } -// CHECK-LABEL: @xvstx( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2:%.*]], i64 1) +// CHECK-LABEL: define dso_local void @xvstx( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr noundef [[_2:%.*]]) local_unnamed_addr #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2]], i64 1) // CHECK-NEXT: ret void // void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } -// CHECK-LABEL: @xvextl_qu_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextl_qu_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } -// CHECK-LABEL: @xvinsgr2vr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvinsgr2vr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } -// CHECK-LABEL: @xvinsgr2vr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvinsgr2vr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } -// CHECK-LABEL: @xvreplve0_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } -// CHECK-LABEL: @xvreplve0_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } -// CHECK-LABEL: @xvreplve0_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } -// CHECK-LABEL: @xvreplve0_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } -// CHECK-LABEL: @xvreplve0_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvreplve0_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } -// CHECK-LABEL: @vext2xv_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } -// CHECK-LABEL: @vext2xv_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } -// CHECK-LABEL: @vext2xv_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } -// CHECK-LABEL: @vext2xv_w_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_w_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } -// CHECK-LABEL: @vext2xv_d_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_d_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } -// CHECK-LABEL: @vext2xv_d_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_d_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } -// CHECK-LABEL: @vext2xv_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } -// CHECK-LABEL: @vext2xv_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } -// CHECK-LABEL: @vext2xv_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } -// CHECK-LABEL: @vext2xv_wu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_wu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } -// CHECK-LABEL: @vext2xv_du_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_du_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } -// CHECK-LABEL: @vext2xv_du_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @vext2xv_du_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } -// CHECK-LABEL: @xvpermi_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpermi_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } -// CHECK-LABEL: @xvpermi_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpermi_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } -// CHECK-LABEL: @xvperm_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvperm_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } -// CHECK-LABEL: @xvldrepl_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldrepl_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } -// CHECK-LABEL: @xvldrepl_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldrepl_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1]], i32 2) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } -// CHECK-LABEL: @xvldrepl_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldrepl_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1]], i32 4) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } -// CHECK-LABEL: @xvldrepl_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvldrepl_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef [[_1:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1]], i32 8) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } -// CHECK-LABEL: @xvpickve2gr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_w( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) // CHECK-NEXT: ret i32 [[TMP1]] // int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } -// CHECK-LABEL: @xvpickve2gr_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xvpickve2gr_wu( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) // CHECK-NEXT: ret i32 [[TMP1]] // unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } -// CHECK-LABEL: @xvpickve2gr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local i64 @xvpickve2gr_d( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) // CHECK-NEXT: ret i64 [[TMP1]] // long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } -// CHECK-LABEL: @xvpickve2gr_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local i64 @xvpickve2gr_du( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) // CHECK-NEXT: ret i64 [[TMP1]] // unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } -// CHECK-LABEL: @xvaddwev_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } -// CHECK-LABEL: @xvaddwev_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } -// CHECK-LABEL: @xvaddwev_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } -// CHECK-LABEL: @xvaddwev_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } -// CHECK-LABEL: @xvaddwev_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } -// CHECK-LABEL: @xvaddwev_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } -// CHECK-LABEL: @xvaddwev_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } -// CHECK-LABEL: @xvaddwev_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } -// CHECK-LABEL: @xvsubwev_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } -// CHECK-LABEL: @xvsubwev_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } -// CHECK-LABEL: @xvsubwev_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } -// CHECK-LABEL: @xvsubwev_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } -// CHECK-LABEL: @xvsubwev_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } -// CHECK-LABEL: @xvsubwev_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } -// CHECK-LABEL: @xvsubwev_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } -// CHECK-LABEL: @xvsubwev_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwev_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } -// CHECK-LABEL: @xvmulwev_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } -// CHECK-LABEL: @xvmulwev_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } -// CHECK-LABEL: @xvmulwev_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } -// CHECK-LABEL: @xvmulwev_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } -// CHECK-LABEL: @xvmulwev_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } -// CHECK-LABEL: @xvmulwev_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } -// CHECK-LABEL: @xvmulwev_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } -// CHECK-LABEL: @xvmulwev_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } -// CHECK-LABEL: @xvaddwod_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } -// CHECK-LABEL: @xvaddwod_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } -// CHECK-LABEL: @xvaddwod_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } -// CHECK-LABEL: @xvaddwod_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } -// CHECK-LABEL: @xvaddwod_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } -// CHECK-LABEL: @xvaddwod_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } -// CHECK-LABEL: @xvaddwod_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } -// CHECK-LABEL: @xvaddwod_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } -// CHECK-LABEL: @xvsubwod_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } -// CHECK-LABEL: @xvsubwod_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } -// CHECK-LABEL: @xvsubwod_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } -// CHECK-LABEL: @xvsubwod_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } -// CHECK-LABEL: @xvsubwod_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } -// CHECK-LABEL: @xvsubwod_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } -// CHECK-LABEL: @xvsubwod_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } -// CHECK-LABEL: @xvsubwod_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsubwod_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } -// CHECK-LABEL: @xvmulwod_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } -// CHECK-LABEL: @xvmulwod_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } -// CHECK-LABEL: @xvmulwod_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } -// CHECK-LABEL: @xvmulwod_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } -// CHECK-LABEL: @xvmulwod_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } -// CHECK-LABEL: @xvmulwod_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } -// CHECK-LABEL: @xvmulwod_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } -// CHECK-LABEL: @xvmulwod_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } -// CHECK-LABEL: @xvaddwev_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } -// CHECK-LABEL: @xvaddwev_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } -// CHECK-LABEL: @xvaddwev_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } -// CHECK-LABEL: @xvmulwev_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } -// CHECK-LABEL: @xvmulwev_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } -// CHECK-LABEL: @xvmulwev_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } -// CHECK-LABEL: @xvaddwod_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } -// CHECK-LABEL: @xvaddwod_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } -// CHECK-LABEL: @xvaddwod_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } -// CHECK-LABEL: @xvmulwod_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } -// CHECK-LABEL: @xvmulwod_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } -// CHECK-LABEL: @xvmulwod_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } -// CHECK-LABEL: @xvhaddw_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } -// CHECK-LABEL: @xvhaddw_qu_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhaddw_qu_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } -// CHECK-LABEL: @xvhsubw_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } -// CHECK-LABEL: @xvhsubw_qu_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvhsubw_qu_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } -// CHECK-LABEL: @xvmaddwev_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_q_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_q_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_d_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_d_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_w_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_w_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_h_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_h_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwev_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwev_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_d_wu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_d_wu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_w_hu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_w_hu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } -// CHECK-LABEL: @xvmaddwod_h_bu_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmaddwod_h_bu_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } -// CHECK-LABEL: @xvrotr_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotr_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } -// CHECK-LABEL: @xvrotr_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotr_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } -// CHECK-LABEL: @xvrotr_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotr_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } -// CHECK-LABEL: @xvrotr_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotr_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } -// CHECK-LABEL: @xvadd_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvadd_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } -// CHECK-LABEL: @xvsub_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsub_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } -// CHECK-LABEL: @xvaddwev_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwev_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } -// CHECK-LABEL: @xvaddwod_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvaddwod_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } -// CHECK-LABEL: @xvmulwev_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwev_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } -// CHECK-LABEL: @xvmulwod_q_du_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmulwod_q_du_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } -// CHECK-LABEL: @xvmskgez_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmskgez_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } -// CHECK-LABEL: @xvmsknz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvmsknz_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } -// CHECK-LABEL: @xvexth_h_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_h_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } -// CHECK-LABEL: @xvexth_w_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_w_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } -// CHECK-LABEL: @xvexth_d_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_d_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } -// CHECK-LABEL: @xvexth_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } -// CHECK-LABEL: @xvexth_hu_bu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_hu_bu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1]]) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } -// CHECK-LABEL: @xvexth_wu_hu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_wu_hu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1]]) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } -// CHECK-LABEL: @xvexth_du_wu( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_du_wu( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } -// CHECK-LABEL: @xvexth_qu_du( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvexth_qu_du( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } -// CHECK-LABEL: @xvrotri_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotri_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } -// CHECK-LABEL: @xvrotri_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotri_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } -// CHECK-LABEL: @xvrotri_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotri_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } -// CHECK-LABEL: @xvrotri_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvrotri_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } -// CHECK-LABEL: @xvextl_q_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvextl_q_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) -// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } -// CHECK-LABEL: @xvsrlni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvsrlni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvsrlni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvsrlni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvsrlrni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvsrlrni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvsrlrni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvsrlrni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrlrni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrlni_du_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlni_du_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrlrni_du_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrlrni_du_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } -// CHECK-LABEL: @xvsrani_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrani_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvsrani_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrani_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvsrani_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrani_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvsrani_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrani_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvsrarni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvsrarni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvsrarni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvsrarni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvsrarni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrani_du_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrani_du_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_b_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_b_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_h_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_h_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_w_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_w_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_d_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_d_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_bu_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_bu_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_hu_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_hu_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_wu_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_wu_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } -// CHECK-LABEL: @xvssrarni_du_q( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvssrarni_du_q( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } -// CHECK-LABEL: @xbnz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_b( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } -// CHECK-LABEL: @xbnz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_d( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } -// CHECK-LABEL: @xbnz_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_h( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } -// CHECK-LABEL: @xbnz_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_v( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } -// CHECK-LABEL: @xbnz_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbnz_w( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } -// CHECK-LABEL: @xbz_b( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_b( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } -// CHECK-LABEL: @xbz_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_d( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } -// CHECK-LABEL: @xbz_h( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_h( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } -// CHECK-LABEL: @xbz_v( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_v( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } -// CHECK-LABEL: @xbz_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local signext i32 @xbz_w( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) // CHECK-NEXT: ret i32 [[TMP1]] // int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } -// CHECK-LABEL: @xvfcmp_caf_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_caf_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_caf_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_caf_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_ceq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_ceq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_ceq_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_ceq_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cle_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cle_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cle_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cle_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_clt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_clt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_clt_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_clt_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cne_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cne_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cne_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cne_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cor_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cor_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cor_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cor_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cueq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cueq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cueq_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cueq_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cule_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cule_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cule_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cule_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cult_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cult_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cult_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cult_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cun_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cun_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cune_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cune_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_cune_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cune_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_cun_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_cun_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_saf_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_saf_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_saf_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_saf_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_seq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_seq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_seq_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_seq_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sle_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sle_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sle_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sle_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_slt_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_slt_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_slt_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_slt_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sne_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sne_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sne_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sne_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sor_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sor_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sor_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sor_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sueq_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sueq_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sueq_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sueq_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sule_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sule_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sule_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sule_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sult_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sult_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sult_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sult_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sun_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sun_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sune_d( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sune_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } -// CHECK-LABEL: @xvfcmp_sune_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sune_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } -// CHECK-LABEL: @xvfcmp_sun_s( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvfcmp_sun_s( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } -// CHECK-LABEL: @xvpickve_d_f( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickve_d_f( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) -// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } -// CHECK-LABEL: @xvpickve_w_f( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @xvpickve_w_f( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) -// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } -// CHECK-LABEL: @xvrepli_b( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvrepli_b( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<32 x i8>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } -// CHECK-LABEL: @xvrepli_d( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvrepli_d( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) -// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } -// CHECK-LABEL: @xvrepli_h( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvrepli_h( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i16>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) -// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } -// CHECK-LABEL: @xvrepli_w( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @xvrepli_w( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) -// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } +//. +// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c index cdbfdd6b7975a..59b71cd355813 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE // RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \ @@ -6,20 +6,23 @@ // RUN: %clang_cc1 -O0 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE-NOOPT -// CHECK-LE-LABEL: @test1( -// CHECK-LE-NEXT: entry: -// CHECK-LE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC4:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]]) -// CHECK-LE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LE-LABEL: define dso_local void @test1( +// CHECK-LE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-LE-NEXT: [[ENTRY:.*:]] +// CHECK-LE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC4]], <16 x i8> [[VC3]], <16 x i8> [[VC2]], <16 x i8> [[VC1]]) +// CHECK-LE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2:![0-9]+]] // CHECK-LE-NEXT: ret void // -// CHECK-BE-LABEL: @test1( -// CHECK-BE-NEXT: entry: -// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC4:%.*]]) -// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]] +// CHECK-BE-LABEL: define dso_local void @test1( +// CHECK-BE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-BE-NEXT: [[ENTRY:.*:]] +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC1]], <16 x i8> [[VC2]], <16 x i8> [[VC3]], <16 x i8> [[VC4]]) +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2:![0-9]+]] // CHECK-BE-NEXT: ret void // -// CHECK-LE-NOOPT-LABEL: @test1( -// CHECK-LE-NOOPT-NEXT: entry: +// CHECK-LE-NOOPT-LABEL: define dso_local void @test1( +// CHECK-LE-NOOPT-SAME: ptr noundef [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef [[RESP:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-LE-NOOPT-NEXT: [[ENTRY:.*:]] // CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8 // CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8 // CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16 @@ -30,13 +33,13 @@ // CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64 // CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32 // CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <512 x i1>, align 64 -// CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8 -// CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8 -// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16 -// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16 -// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC3:%.*]], ptr [[VC3_ADDR]], align 16 -// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC4:%.*]], ptr [[VC4_ADDR]], align 16 -// CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store ptr [[VQP]], ptr [[VQP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store ptr [[VPP]], ptr [[VPP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1]], ptr [[VC1_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2]], ptr [[VC2_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC3]], ptr [[VC3_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC4]], ptr [[VC4_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store ptr [[RESP]], ptr [[RESP_ADDR]], align 8 // CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8 // CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64 // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64 @@ -63,20 +66,23 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec *((__vector_quad *)resp) = res; } -// CHECK-LE-LABEL: @test2( -// CHECK-LE-NEXT: entry: -// CHECK-LE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]]) -// CHECK-LE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]] +// CHECK-LE-LABEL: define dso_local void @test2( +// CHECK-LE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-LE-NEXT: [[ENTRY:.*:]] +// CHECK-LE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC2]], <16 x i8> [[VC1]]) +// CHECK-LE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] // CHECK-LE-NEXT: ret void // -// CHECK-BE-LABEL: @test2( -// CHECK-BE-NEXT: entry: -// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]]) -// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]] +// CHECK-BE-LABEL: define dso_local void @test2( +// CHECK-BE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-BE-NEXT: [[ENTRY:.*:]] +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC1]], <16 x i8> [[VC2]]) +// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] // CHECK-BE-NEXT: ret void // -// CHECK-LE-NOOPT-LABEL: @test2( -// CHECK-LE-NOOPT-NEXT: entry: +// CHECK-LE-NOOPT-LABEL: define dso_local void @test2( +// CHECK-LE-NOOPT-SAME: ptr noundef [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef [[RESP:%.*]]) #[[ATTR0]] { +// CHECK-LE-NOOPT-NEXT: [[ENTRY:.*:]] // CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8 // CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8 // CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16 @@ -85,11 +91,11 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec // CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64 // CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32 // CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <256 x i1>, align 32 -// CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8 -// CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8 -// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16 -// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16 -// CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store ptr [[VQP]], ptr [[VQP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store ptr [[VPP]], ptr [[VPP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1]], ptr [[VC1_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2]], ptr [[VC2_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store ptr [[RESP]], ptr [[RESP_ADDR]], align 8 // CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8 // CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64 // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64 @@ -113,3 +119,18 @@ void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, __builtin_vsx_build_pair(&res, vc1, vc2); *((__vector_pair *)resp) = res; } +//. +// CHECK-LE: [[__VECTOR_QUAD_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-LE: [[META3]] = !{!"__vector_quad", [[META4:![0-9]+]], i64 0} +// CHECK-LE: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-LE: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK-LE: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-LE: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +//. +// CHECK-BE: [[__VECTOR_QUAD_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-BE: [[META3]] = !{!"__vector_quad", [[META4:![0-9]+]], i64 0} +// CHECK-BE: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-BE: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK-BE: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-BE: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +//. diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c index c66f5e2a32919..f62656757c8c5 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c @@ -1,17 +1,26 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future \ // RUN: -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu future \ -// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=AIX -// CHECK-LABEL: @test_dmxvi8gerx4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]] +// CHECK-LABEL: define dso_local void @test_dmxvi8gerx4( +// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6:![0-9]+]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvi8gerx4( +// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2:![0-9]+]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6:![0-9]+]] +// AIX-NEXT: ret void +// void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -19,13 +28,22 @@ void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: @test_pmdmxvi8gerx4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4( +// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvi8gerx4( +// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: ret void +// void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -33,14 +51,24 @@ void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigne *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: @test_dmxvi8gerx4pp( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-LABEL: define dso_local void @test_dmxvi8gerx4pp( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvi8gerx4pp( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: ret void +// void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -48,14 +76,24 @@ void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigne *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: @test_pmdmxvi8gerx4pp( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4pp( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvi8gerx4pp( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: ret void +// void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -63,14 +101,24 @@ void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsig *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: @test_dmxvi8gerx4spp( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-LABEL: define dso_local void @test_dmxvi8gerx4spp( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmxvi8gerx4spp( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: ret void +// void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -78,14 +126,24 @@ void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsign *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: @test_pmdmxvi8gerx4spp( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]] +// CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4spp( +// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_pmdmxvi8gerx4spp( +// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA2]] +// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA6]] +// AIX-NEXT: ret void +// void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __dmr1024 vdmr = *((__dmr1024 *)vdmrp); __vector_pair vp = *((__vector_pair *)vpp); @@ -93,17 +151,30 @@ void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsi *((__dmr1024 *)resp) = vdmr; } -// CHECK-LABEL: @test_dmf_basic( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @test_dmf_basic( +// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef captures(none) [[RES2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz() // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]]) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES1:%.*]], align 128 -// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2:%.*]], align 128 -// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr [[P:%.*]], align 128 +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES1]], align 128 +// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2]], align 128 +// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr [[P]], align 128 // CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]]) // CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128 // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmf_basic( +// AIX-SAME: ptr noundef readonly captures(none) [[P:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef captures(none) [[RES2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz() +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]]) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES1]], align 128 +// AIX-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2]], align 128 +// AIX-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr [[P]], align 128 +// AIX-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]]) +// AIX-NEXT: store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128 +// AIX-NEXT: ret void +// void test_dmf_basic(char *p, char *res1, char *res2) { __dmr1024 x[2]; __builtin_mma_dmsetdmrz(&x[0]); @@ -111,18 +182,46 @@ void test_dmf_basic(char *p, char *res1, char *res2) { __builtin_mma_dmxor((__dmr1024*)res2, (__dmr1024*)p); } -// CHECK-LABEL: @test_dmf_basic2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V:%.*]], align 16, !tbaa [[TBAA8:![0-9]+]] +// CHECK-LABEL: define dso_local void @test_dmf_basic2( +// CHECK-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]]) -// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2:%.*]], align 128 -// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1:%.*]], align 128 -// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RES1:%.*]], align 128 +// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128 +// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128 +// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RES1]], align 128 // CHECK-NEXT: ret void // +// AIX-LABEL: define void @test_dmf_basic2( +// AIX-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]] +// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]]) +// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128 +// AIX-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128 +// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RES1]], align 128 +// AIX-NEXT: ret void +// void test_dmf_basic2(char *p1, char *res1, char *res2, vector unsigned char *v) { vector unsigned char vv = *v; __builtin_mma_build_dmr((__dmr1024*)res2, vv, vv, vv, vv, vv, vv, vv, vv); __builtin_mma_disassemble_dmr(res1, (__dmr1024*)p1); } +//. +// CHECK: [[__VECTOR_PAIR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[__DMR1024_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"__dmr1024", [[META4]], i64 0} +// CHECK: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0} +//. +// AIX: [[__VECTOR_PAIR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// AIX: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0} +// AIX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// AIX: [[META5]] = !{!"Simple C/C++ TBAA"} +// AIX: [[__DMR1024_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// AIX: [[META7]] = !{!"__dmr1024", [[META4]], i64 0} +// AIX: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0} +//. diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c index 08ff936a0a797..5c7b222cb618e 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c @@ -1,13 +1,14 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ // RUN: -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \ // RUN: -emit-llvm %s -o - | FileCheck %s -// CHECK-LABEL: @test1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @test1( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2:![0-9]+]] // CHECK-NEXT: ret void // void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -18,12 +19,13 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi *((__vector_quad *)resp) = res; } -// CHECK-LABEL: @test2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64 +// CHECK-LABEL: define dso_local void @test2( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64 // CHECK-NEXT: [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 0 -// CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[RESP]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 1 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 16 // CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16 @@ -39,10 +41,11 @@ void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp); } -// CHECK-LABEL: @test3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]] +// CHECK-LABEL: define dso_local void @test3( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]] // CHECK-NEXT: ret void // void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -53,12 +56,13 @@ void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi *((__vector_pair *)resp) = res; } -// CHECK-LABEL: @test4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32 +// CHECK-LABEL: define dso_local void @test4( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32 // CHECK-NEXT: [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0 -// CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[RESP]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 16 // CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16 @@ -68,11 +72,12 @@ void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp); } -// CHECK-LABEL: @test5( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test5( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP0]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -82,11 +87,12 @@ void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test6( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test6( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP0]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -96,10 +102,11 @@ void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test7( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @test7( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -109,10 +116,11 @@ void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test8( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -122,10 +130,11 @@ void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test9( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test9( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -135,10 +144,11 @@ void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test10( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test10( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -148,10 +158,11 @@ void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test11( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test11( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -161,10 +172,11 @@ void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test12( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test12( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -174,10 +186,11 @@ void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test13( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test13( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -187,11 +200,12 @@ void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test14( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test14( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -201,10 +215,11 @@ void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test15( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test15( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -214,10 +229,11 @@ void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test16( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -227,10 +243,11 @@ void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test17( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test17( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -240,10 +257,11 @@ void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test18( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test18( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -253,10 +271,11 @@ void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test19( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test19( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -266,10 +285,11 @@ void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test20( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test20( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -279,11 +299,12 @@ void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test21( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test21( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -293,11 +314,12 @@ void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test22( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test22( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -307,11 +329,12 @@ void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test23( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test23( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -321,11 +344,12 @@ void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test24( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test24( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -335,11 +359,12 @@ void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test25( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test25( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -349,11 +374,12 @@ void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test26( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test26( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -363,11 +389,12 @@ void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test27( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test27( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -377,11 +404,12 @@ void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test28( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test28( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -391,11 +419,12 @@ void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test29( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test29( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -405,11 +434,12 @@ void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test30( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test30( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -419,11 +449,12 @@ void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test31( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test31( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -433,11 +464,12 @@ void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test32( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -447,11 +479,12 @@ void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test33( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test33( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -461,11 +494,12 @@ void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test34( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test34( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -475,11 +509,12 @@ void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test35( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test35( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -489,11 +524,12 @@ void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test36( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test36( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -503,11 +539,12 @@ void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test37( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test37( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -517,11 +554,12 @@ void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test38( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test38( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -531,11 +569,12 @@ void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test39( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test39( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -545,11 +584,12 @@ void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test40( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test40( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -559,11 +599,12 @@ void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test41( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test41( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -573,11 +614,12 @@ void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test42( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test42( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -587,11 +629,12 @@ void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test43( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test43( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -601,11 +644,12 @@ void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test44( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test44( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -615,11 +659,12 @@ void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test45( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test45( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -629,11 +674,12 @@ void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test46( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test46( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -643,11 +689,12 @@ void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test47( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test47( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -657,12 +704,13 @@ void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test48( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test48( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -672,12 +720,13 @@ void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test49( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test49( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -687,12 +736,13 @@ void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test50( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test50( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -702,12 +752,13 @@ void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test51( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test51( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -717,12 +768,13 @@ void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test52( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test52( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -732,12 +784,13 @@ void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test53( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test53( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -747,12 +800,13 @@ void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test54( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test54( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -762,12 +816,13 @@ void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test55( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test55( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -777,10 +832,11 @@ void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test56( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test56( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -790,10 +846,11 @@ void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test57( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test57( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -803,11 +860,12 @@ void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test58( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test58( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -817,11 +875,12 @@ void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test59( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test59( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -831,11 +890,12 @@ void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test60( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test60( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -845,11 +905,12 @@ void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test61( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test61( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -859,11 +920,12 @@ void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test62( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test62( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -873,11 +935,12 @@ void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test63( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test63( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -887,11 +950,12 @@ void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test64( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -901,11 +965,12 @@ void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test65( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test65( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -915,10 +980,11 @@ void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test66( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]]) -// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2:%.*]]) +// CHECK-LABEL: define dso_local void @test66( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]]) +// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2]]) // CHECK-NEXT: ret void // void test66(const __vector_pair *vpp, __vector_pair *vp2) { @@ -926,11 +992,12 @@ void test66(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_vsx_stxvp(vp, 0L, vp2); } -// CHECK-LABEL: @test67( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFSET:%.*]] +// CHECK-LABEL: define dso_local void @test67( +// CHECK-SAME: ptr noundef [[VPP:%.*]], i64 noundef [[OFFSET:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFSET]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 [[OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 [[OFFSET]] // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -939,11 +1006,12 @@ void test67(const __vector_pair *vpp, signed long offset, __vector_pair *vp2) { __builtin_vsx_stxvp(vp, offset, vp2); } -// CHECK-LABEL: @test68( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 18 +// CHECK-LABEL: define dso_local void @test68( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 18 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 18 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 18 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -952,11 +1020,12 @@ void test68(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_vsx_stxvp(vp, 18L, vp2); } -// CHECK-LABEL: @test69( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 1 +// CHECK-LABEL: define dso_local void @test69( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 1 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 1 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 1 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -965,11 +1034,12 @@ void test69(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_vsx_stxvp(vp, 1L, vp2); } -// CHECK-LABEL: @test70( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 42 +// CHECK-LABEL: define dso_local void @test70( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 42 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 42 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 42 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -978,11 +1048,12 @@ void test70(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_vsx_stxvp(vp, 42L, vp2); } -// CHECK-LABEL: @test71( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32768 +// CHECK-LABEL: define dso_local void @test71( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 32768 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32768 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 32768 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -991,11 +1062,12 @@ void test71(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_vsx_stxvp(vp, 32768L, vp2); } -// CHECK-LABEL: @test72( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32799 +// CHECK-LABEL: define dso_local void @test72( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 32799 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32799 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 32799 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -1004,13 +1076,14 @@ void test72(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_vsx_stxvp(vp, 32799L, vp2); } -// CHECK-LABEL: @test73( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 8 +// CHECK-LABEL: define dso_local void @test73( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 8 // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1020,12 +1093,13 @@ void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test74( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test74( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1035,13 +1109,14 @@ void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test75( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFS:%.*]] +// CHECK-LABEL: define dso_local void @test75( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], i64 noundef [[OFFS:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFS]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test75(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1051,10 +1126,11 @@ void test75(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vect *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test76( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) -// CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]] +// CHECK-LABEL: define dso_local void @test76( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]] // CHECK-NEXT: ret void // void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1065,12 +1141,13 @@ void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns *((__vector_pair *)resp) = res; } -// CHECK-LABEL: @test77( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32 +// CHECK-LABEL: define dso_local void @test77( +// CHECK-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32 // CHECK-NEXT: [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0 -// CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[RESP]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 16 // CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16 @@ -1080,10 +1157,11 @@ void test77(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp); } -// CHECK-LABEL: @test78( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]]) -// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2:%.*]]) +// CHECK-LABEL: define dso_local void @test78( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]]) +// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2]]) // CHECK-NEXT: ret void // void test78(const __vector_pair *vpp, __vector_pair *vp2) { @@ -1091,11 +1169,12 @@ void test78(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_mma_stxvp(vp, 0L, vp2); } -// CHECK-LABEL: @test79( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFSET:%.*]] +// CHECK-LABEL: define dso_local void @test79( +// CHECK-SAME: ptr noundef [[VPP:%.*]], i64 noundef [[OFFSET:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFSET]] // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 [[OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 [[OFFSET]] // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -1104,11 +1183,12 @@ void test79(const __vector_pair *vpp, signed long offset, __vector_pair *vp2) { __builtin_mma_stxvp(vp, offset, vp2); } -// CHECK-LABEL: @test80( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 18 +// CHECK-LABEL: define dso_local void @test80( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 18 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 18 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 18 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -1117,11 +1197,12 @@ void test80(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_mma_stxvp(vp, 18L, vp2); } -// CHECK-LABEL: @test81( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 1 +// CHECK-LABEL: define dso_local void @test81( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 1 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 1 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 1 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -1130,11 +1211,12 @@ void test81(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_mma_stxvp(vp, 1L, vp2); } -// CHECK-LABEL: @test82( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 42 +// CHECK-LABEL: define dso_local void @test82( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 42 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 42 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 42 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -1143,11 +1225,12 @@ void test82(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_mma_stxvp(vp, 42L, vp2); } -// CHECK-LABEL: @test83( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32768 +// CHECK-LABEL: define dso_local void @test83( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 32768 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32768 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 32768 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -1156,11 +1239,12 @@ void test83(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_mma_stxvp(vp, 32768L, vp2); } -// CHECK-LABEL: @test84( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32799 +// CHECK-LABEL: define dso_local void @test84( +// CHECK-SAME: ptr noundef [[VPP:%.*]], ptr noundef [[VP2:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP]], i64 32799 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32799 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2]], i64 32799 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]]) // CHECK-NEXT: ret void // @@ -1169,13 +1253,14 @@ void test84(const __vector_pair *vpp, __vector_pair *vp2) { __builtin_mma_stxvp(vp, 32799L, vp2); } -// CHECK-LABEL: @test85( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 8 +// CHECK-LABEL: define dso_local void @test85( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 8 // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]], i32 0, i32 0) -// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1185,12 +1270,13 @@ void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test86( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @test86( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1200,13 +1286,14 @@ void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v *((__vector_quad *)resp) = vq; } -// CHECK-LABEL: @test87( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFS:%.*]] +// CHECK-LABEL: define dso_local void @test87( +// CHECK-SAME: ptr noundef readonly captures(none) [[VQP:%.*]], i64 noundef [[OFFS:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP]], i64 [[OFFS]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]]) -// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC]]) +// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA2]] // CHECK-NEXT: ret void // void test87(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1215,3 +1302,11 @@ void test87(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vect __builtin_mma_xvf64gernp(&vq, vp, vc); *((__vector_quad *)resp) = vq; } +//. +// CHECK: [[__VECTOR_QUAD_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"__vector_quad", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"__vector_pair", [[META4]], i64 0} +//. diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c index 45a099dc9c678..1f0b3d4a560e7 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 // REQUIRES: riscv-registered-target @@ -53,10 +53,11 @@ DEFINE_STRUCT(bool64) // bool //===----------------------------------------------------------------------===// -// CHECK-128-LABEL: @read_bool32( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-LABEL: define dso_local @read_bool32( +// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP0]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i1.nxv8i1( [[TMP1]], i64 0) @@ -66,23 +67,25 @@ vbool32_t read_bool32(struct struct_bool32 *s) { return s->y[0]; } -// CHECK-128-LABEL: @write_bool32( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv2i1( zeroinitializer, [[X:%.*]], i64 0) +// CHECK-128-LABEL: define dso_local void @write_bool32( +// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((1, 2)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv2i1( zeroinitializer, [[X]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[TMP0]] to // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8( [[TMP1]], i64 0) -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1 -// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1 +// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // void write_bool32(struct struct_bool32 *s, vbool32_t x) { s->y[0] = x; } -// CHECK-128-LABEL: @read_bool64( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[TBAA6]] +// CHECK-128-LABEL: define dso_local @read_bool64( +// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP0]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv1i1.nxv8i1( [[TMP1]], i64 0) @@ -92,15 +95,21 @@ vbool64_t read_bool64(struct struct_bool64 *s) { return s->y[0]; } -// CHECK-128-LABEL: @write_bool64( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv1i1( zeroinitializer, [[X:%.*]], i64 0) +// CHECK-128-LABEL: define dso_local void @write_bool64( +// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((1, 2)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv1i1( zeroinitializer, [[X]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[TMP0]] to // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8( [[TMP1]], i64 0) -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1 -// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 1 +// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // void write_bool64(struct struct_bool64 *s, vbool64_t x) { s->y[0] = x; } +//. +// CHECK-128: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-128: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +// CHECK-128: [[META8]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c index ecde52eb3d762..b92e6dff31748 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-64 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256 @@ -67,24 +67,27 @@ DEFINE_STRUCT(bool64) // int64 //===----------------------------------------------------------------------===// -// CHECK-64-LABEL: @read_int64m1( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-64-LABEL: define dso_local @read_int64m1( +// CHECK-64-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v1i64( poison, <1 x i64> [[TMP0]], i64 0) // CHECK-64-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-128-LABEL: @read_int64m1( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-LABEL: define dso_local @read_int64m1( +// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v2i64( poison, <2 x i64> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-256-LABEL: @read_int64m1( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-256-LABEL: define dso_local @read_int64m1( +// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v4i64( poison, <4 x i64> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // @@ -92,25 +95,28 @@ vint64m1_t read_int64m1(struct struct_int64m1 *s) { return s->y[0]; } -// CHECK-64-LABEL: @write_int64m1( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64( [[X:%.*]], i64 0) -// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8 -// CHECK-64-NEXT: store <1 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-64-LABEL: define dso_local void @write_int64m1( +// CHECK-64-SAME: ptr noundef writeonly captures(none) initializes((8, 16)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64( [[X]], i64 0) +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 +// CHECK-64-NEXT: store <1 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: ret void // -// CHECK-128-LABEL: @write_int64m1( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv1i64( [[X:%.*]], i64 0) -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-128-LABEL: define dso_local void @write_int64m1( +// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv1i64( [[X]], i64 0) +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // -// CHECK-256-LABEL: @write_int64m1( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[X:%.*]], i64 0) -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-256-LABEL: define dso_local void @write_int64m1( +// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[X]], i64 0) +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) { @@ -121,24 +127,27 @@ void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) { // float64 //===----------------------------------------------------------------------===// -// CHECK-64-LABEL: @read_float64m1( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x double>, ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-64-LABEL: define dso_local @read_float64m1( +// CHECK-64-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1f64.v1f64( poison, <1 x double> [[TMP0]], i64 0) // CHECK-64-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-128-LABEL: @read_float64m1( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-128-LABEL: define dso_local @read_float64m1( +// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1f64.v2f64( poison, <2 x double> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-256-LABEL: @read_float64m1( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-256-LABEL: define dso_local @read_float64m1( +// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1f64.v4f64( poison, <4 x double> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // @@ -146,25 +155,28 @@ vfloat64m1_t read_float64m1(struct struct_float64m1 *s) { return s->y[0]; } -// CHECK-64-LABEL: @write_float64m1( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x double> @llvm.vector.extract.v1f64.nxv1f64( [[X:%.*]], i64 0) -// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8 -// CHECK-64-NEXT: store <1 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-64-LABEL: define dso_local void @write_float64m1( +// CHECK-64-SAME: ptr noundef writeonly captures(none) initializes((8, 16)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x double> @llvm.vector.extract.v1f64.nxv1f64( [[X]], i64 0) +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 +// CHECK-64-NEXT: store <1 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: ret void // -// CHECK-128-LABEL: @write_float64m1( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv1f64( [[X:%.*]], i64 0) -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-128-LABEL: define dso_local void @write_float64m1( +// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv1f64( [[X]], i64 0) +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // -// CHECK-256-LABEL: @write_float64m1( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[X:%.*]], i64 0) -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-256-LABEL: define dso_local void @write_float64m1( +// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[X]], i64 0) +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) { @@ -175,26 +187,29 @@ void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) { // bool //===----------------------------------------------------------------------===// -// CHECK-64-LABEL: @read_bool1( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-64-LABEL: define dso_local @read_bool1( +// CHECK-64-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) // CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] // -// CHECK-128-LABEL: @read_bool1( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-128-LABEL: define dso_local @read_bool1( +// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v16i8( poison, <16 x i8> [[TMP0]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: ret [[TMP1]] // -// CHECK-256-LABEL: @read_bool1( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-256-LABEL: define dso_local @read_bool1( +// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v32i8( poison, <32 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] @@ -203,30 +218,46 @@ vbool1_t read_bool1(struct struct_bool1 *s) { return s->y[0]; } -// CHECK-64-LABEL: @write_bool1( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-64-LABEL: define dso_local void @write_bool1( +// CHECK-64-SAME: ptr noundef writeonly captures(none) initializes((8, 16)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8( [[TMP0]], i64 0) -// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8 -// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 +// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: ret void // -// CHECK-128-LABEL: @write_bool1( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-128-LABEL: define dso_local void @write_bool1( +// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8( [[TMP0]], i64 0) -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-128-NEXT: ret void // -// CHECK-256-LABEL: @write_bool1( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-256-LABEL: define dso_local void @write_bool1( +// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[TMP0]], i64 0) -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA6]] +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // void write_bool1(struct struct_bool1 *s, vbool1_t x) { s->y[0] = x; } +//. +// CHECK-64: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-64: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +// CHECK-64: [[META8]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-128: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-128: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +// CHECK-128: [[META8]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-256: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-256: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +// CHECK-256: [[META8]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c index 0a50e41dda7e1..4517b52aefdfd 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s // REQUIRES: riscv-registered-target @@ -31,89 +31,100 @@ typedef vbool1_t fixed_bool1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fi typedef vbool4_t fixed_bool4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen/4))); typedef vbool32_t fixed_bool32_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen/32))); -// CHECK-LABEL: @to_vint32m1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] +// CHECK-LABEL: define dso_local noundef @to_vint32m1_t( +// CHECK-SAME: noundef returned [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE_COERCE]] // vint32m1_t to_vint32m1_t(fixed_int32m1_t type) { return type; } -// CHECK-LABEL: @from_vint32m1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE:%.*]] +// CHECK-LABEL: define dso_local @from_vint32m1_t( +// CHECK-SAME: returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE]] // fixed_int32m1_t from_vint32m1_t(vint32m1_t type) { return type; } -// CHECK-LABEL: @to_vfloat64m1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] +// CHECK-LABEL: define dso_local noundef @to_vfloat64m1_t( +// CHECK-SAME: noundef returned [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE_COERCE]] // vfloat64m1_t to_vfloat64m1_t(fixed_float64m1_t type) { return type; } -// CHECK-LABEL: @from_vfloat64m1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE:%.*]] +// CHECK-LABEL: define dso_local @from_vfloat64m1_t( +// CHECK-SAME: returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE]] // fixed_float64m1_t from_vfloat64m1_t(vfloat64m1_t type) { return type; } -// CHECK-LABEL: @from_vbool1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE:%.*]] +// CHECK-LABEL: define dso_local @from_vbool1_t( +// CHECK-SAME: returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE]] // fixed_bool1_t from_vbool1_t(vbool1_t type) { return type; } -// CHECK-LABEL: @to_vbool1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TMP0:%.*]] +// CHECK-LABEL: define dso_local noundef @to_vbool1_t( +// CHECK-SAME: noundef returned [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TMP0]] // vbool1_t to_vbool1_t(fixed_bool1_t type) { return type; } -// CHECK-LABEL: @from_vbool4_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE:%.*]] +// CHECK-LABEL: define dso_local @from_vbool4_t( +// CHECK-SAME: returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE]] // fixed_bool4_t from_vbool4_t(vbool4_t type) { return type; } -// CHECK-LABEL: @to_vbool4_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TMP0:%.*]] +// CHECK-LABEL: define dso_local noundef @to_vbool4_t( +// CHECK-SAME: noundef returned [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TMP0]] // vbool4_t to_vbool4_t(fixed_bool4_t type) { return type; } -// CHECK-LABEL: @from_vbool32_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE:%.*]] +// CHECK-LABEL: define dso_local @from_vbool32_t( +// CHECK-SAME: returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE]] // fixed_bool32_t from_vbool32_t(vbool32_t type) { return type; } -// CHECK-LABEL: @to_vbool32_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TMP0:%.*]] +// CHECK-LABEL: define dso_local noundef @to_vbool32_t( +// CHECK-SAME: noundef returned [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TMP0]] // vbool32_t to_vbool32_t(fixed_bool32_t type) { return type; } -// CHECK-LABEL: @to_vint32m1_t__from_gnu_int32m1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]] +// CHECK-LABEL: define dso_local @to_vint32m1_t__from_gnu_int32m1_t( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -121,19 +132,21 @@ vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { return type; } -// CHECK-LABEL: @from_vint32m1_t__to_gnu_int32m1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE:%.*]], i64 0) -// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA6]] +// CHECK-LABEL: define dso_local void @from_vint32m1_t__to_gnu_int32m1_t( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], [[TYPE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE]], i64 0) +// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { return type; } -// CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA6]] +// CHECK-LABEL: define dso_local @to_fixed_int32m1_t__from_gnu_int32m1_t( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -141,12 +154,18 @@ fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { return type; } -// CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE_COERCE:%.*]], i64 0) -// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA6]] +// CHECK-LABEL: define dso_local void @from_fixed_int32m1_t__to_gnu_int32m1_t( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x i32>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], noundef [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE_COERCE]], i64 0) +// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]] // CHECK-NEXT: ret void // gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) { return type; } +//. +// CHECK: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +// CHECK: [[META8]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c index 92ba27fb65425..f3b91b23a73e4 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-64 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256 @@ -40,59 +40,66 @@ fixed_bool32_t global_bool32; // WRITES //===----------------------------------------------------------------------===// -// CHECK-64-LABEL: @write_global_i64( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64( [[V:%.*]], i64 0) -// CHECK-64-NEXT: store <1 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-64-LABEL: define dso_local void @write_global_i64( +// CHECK-64-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64( [[V]], i64 0) +// CHECK-64-NEXT: store <1 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-64-NEXT: ret void // -// CHECK-256-LABEL: @write_global_i64( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[V:%.*]], i64 0) -// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-256-LABEL: define dso_local void @write_global_i64( +// CHECK-256-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[V]], i64 0) +// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6:![0-9]+]] // CHECK-256-NEXT: ret void // void write_global_i64(vint64m1_t v) { global_i64 = v; } -// CHECK-64-LABEL: @write_global_bool1( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-64-LABEL: define dso_local void @write_global_bool1( +// CHECK-64-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8( [[TMP0]], i64 0) -// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA6]] +// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: ret void // -// CHECK-256-LABEL: @write_global_bool1( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-256-LABEL: define dso_local void @write_global_bool1( +// CHECK-256-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[TMP0]], i64 0) -// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA6]] +// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // void write_global_bool1(vbool1_t v) { global_bool1 = v; } -// CHECK-64-LABEL: @write_global_bool4( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-64-LABEL: define dso_local void @write_global_bool4( +// CHECK-64-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-64-NEXT: store <2 x i8> [[CAST_FIXED]], ptr @global_bool4, align 2, !tbaa [[TBAA6]] +// CHECK-64-NEXT: store <2 x i8> [[CAST_FIXED]], ptr @global_bool4, align 2, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: ret void // -// CHECK-256-LABEL: @write_global_bool4( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-256-LABEL: define dso_local void @write_global_bool4( +// CHECK-256-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[V]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-256-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool4, align 8, !tbaa [[TBAA6]] +// CHECK-256-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool4, align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // void write_global_bool4(vbool4_t v) { global_bool4 = v; } #if __riscv_v_fixed_vlen >= 256 -// CHECK-256-LABEL: @write_global_bool32( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv2i1( zeroinitializer, [[V:%.*]], i64 0) +// CHECK-256-LABEL: define dso_local void @write_global_bool32( +// CHECK-256-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv2i1( zeroinitializer, [[V]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[TMP0]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8( [[TMP1]], i64 0) -// CHECK-256-NEXT: store <1 x i8> [[CAST_FIXED]], ptr @global_bool32, align 1, !tbaa [[TBAA6]] +// CHECK-256-NEXT: store <1 x i8> [[CAST_FIXED]], ptr @global_bool32, align 1, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: ret void // void write_global_bool32(vbool32_t v) { global_bool32 = v; } @@ -102,46 +109,52 @@ void write_global_bool32(vbool32_t v) { global_bool32 = v; } // READS //===----------------------------------------------------------------------===// -// CHECK-64-LABEL: @read_global_i64( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @global_i64, align 8, !tbaa [[TBAA6]] +// CHECK-64-LABEL: define dso_local @read_global_i64( +// CHECK-64-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v1i64( poison, <1 x i64> [[TMP0]], i64 0) // CHECK-64-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-256-LABEL: @read_global_i64( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @global_i64, align 8, !tbaa [[TBAA6]] +// CHECK-256-LABEL: define dso_local @read_global_i64( +// CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @global_i64, align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v4i64( poison, <4 x i64> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // vint64m1_t read_global_i64() { return global_i64; } -// CHECK-64-LABEL: @read_global_bool1( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA6]] +// CHECK-64-LABEL: define dso_local @read_global_bool1( +// CHECK-64-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) // CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] // -// CHECK-256-LABEL: @read_global_bool1( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA6]] +// CHECK-256-LABEL: define dso_local @read_global_bool1( +// CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr @global_bool1, align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v32i8( poison, <32 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] // vbool1_t read_global_bool1() { return global_bool1; } -// CHECK-64-LABEL: @read_global_bool4( -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool4, align 2, !tbaa [[TBAA6]] +// CHECK-64-LABEL: define dso_local @read_global_bool4( +// CHECK-64-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-64-NEXT: [[ENTRY:.*:]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool4, align 2, !tbaa [[CHAR_TBAA6]] // CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v2i8( poison, <2 x i8> [[TMP0]], i64 0) // CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] // -// CHECK-256-LABEL: @read_global_bool4( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool4, align 8, !tbaa [[TBAA6]] +// CHECK-256-LABEL: define dso_local @read_global_bool4( +// CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool4, align 8, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] @@ -149,9 +162,10 @@ vbool1_t read_global_bool1() { return global_bool1; } vbool4_t read_global_bool4() { return global_bool4; } #if __riscv_v_fixed_vlen >= 256 -// CHECK-256-LABEL: @read_global_bool32( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr @global_bool32, align 1, !tbaa [[TBAA6]] +// CHECK-256-LABEL: define dso_local @read_global_bool32( +// CHECK-256-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr @global_bool32, align 1, !tbaa [[CHAR_TBAA6]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i1.nxv8i1( [[TMP1]], i64 0) @@ -159,3 +173,12 @@ vbool4_t read_global_bool4() { return global_bool4; } // vbool32_t read_global_bool32() { return global_bool32; } #endif +//. +// CHECK-64: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-64: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +// CHECK-64: [[META8]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-256: [[CHAR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-256: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +// CHECK-256: [[META8]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c index 896cef515743c..d25b8d84aa2d5 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: systemz-registered-target // RUN: %clang_cc1 -target-cpu z14 -triple s390x-linux-gnu \ // RUN: -O2 -fzvector -flax-vector-conversions=none \ @@ -14,124 +14,124 @@ volatile vector unsigned long long vul; // CHECK-LABEL: define dso_local void @test( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 // CHECK-NEXT: [[ADD_I:%.*]] = add nsw i128 [[TMP3]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast i128 [[ADD_I]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP4]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP5:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP4]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP5:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to i128 // CHECK-NEXT: [[TMP9:%.*]] = tail call i128 @llvm.s390.vaccq(i128 [[TMP7]], i128 [[TMP8]]) // CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP9]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP10]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP11:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP12:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP10]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP11:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP12:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP11]] to i128 // CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP12]] to i128 // CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP13]] to i128 // CHECK-NEXT: [[TMP17:%.*]] = tail call i128 @llvm.s390.vacq(i128 [[TMP14]], i128 [[TMP15]], i128 [[TMP16]]) // CHECK-NEXT: [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP18]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP19:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP20:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP21:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP18]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP19:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP20:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP21:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i8> [[TMP19]] to i128 // CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i8> [[TMP20]] to i128 // CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP21]] to i128 // CHECK-NEXT: [[TMP25:%.*]] = tail call i128 @llvm.s390.vacccq(i128 [[TMP22]], i128 [[TMP23]], i128 [[TMP24]]) // CHECK-NEXT: [[TMP26:%.*]] = bitcast i128 [[TMP25]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP26]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP27:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP28:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP26]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP27:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP28:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i8> [[TMP27]] to i128 // CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i8> [[TMP28]] to i128 // CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i128 [[TMP29]], [[TMP30]] // CHECK-NEXT: [[TMP31:%.*]] = bitcast i128 [[SUB_I]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP31]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP32:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP33:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP31]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP32:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP33:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP34:%.*]] = bitcast <16 x i8> [[TMP32]] to i128 // CHECK-NEXT: [[TMP35:%.*]] = bitcast <16 x i8> [[TMP33]] to i128 // CHECK-NEXT: [[TMP36:%.*]] = tail call i128 @llvm.s390.vscbiq(i128 [[TMP34]], i128 [[TMP35]]) // CHECK-NEXT: [[TMP37:%.*]] = bitcast i128 [[TMP36]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP37]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP38:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP39:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP40:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP37]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP38:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP39:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP40:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i8> [[TMP38]] to i128 // CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i8> [[TMP39]] to i128 // CHECK-NEXT: [[TMP43:%.*]] = bitcast <16 x i8> [[TMP40]] to i128 // CHECK-NEXT: [[TMP44:%.*]] = tail call i128 @llvm.s390.vsbiq(i128 [[TMP41]], i128 [[TMP42]], i128 [[TMP43]]) // CHECK-NEXT: [[TMP45:%.*]] = bitcast i128 [[TMP44]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP45]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP46:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP47:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP48:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP45]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP46:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP47:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP48:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP49:%.*]] = bitcast <16 x i8> [[TMP46]] to i128 // CHECK-NEXT: [[TMP50:%.*]] = bitcast <16 x i8> [[TMP47]] to i128 // CHECK-NEXT: [[TMP51:%.*]] = bitcast <16 x i8> [[TMP48]] to i128 // CHECK-NEXT: [[TMP52:%.*]] = tail call i128 @llvm.s390.vsbcbiq(i128 [[TMP49]], i128 [[TMP50]], i128 [[TMP51]]) // CHECK-NEXT: [[TMP53:%.*]] = bitcast i128 [[TMP52]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP53]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP54:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP55:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP53]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP54:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP55:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP56:%.*]] = tail call i128 @llvm.s390.vsumqf(<4 x i32> [[TMP54]], <4 x i32> [[TMP55]]) // CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP57]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP58:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP59:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP57]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP58:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP59:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP60:%.*]] = tail call i128 @llvm.s390.vsumqg(<2 x i64> [[TMP58]], <2 x i64> [[TMP59]]) // CHECK-NEXT: [[TMP61:%.*]] = bitcast i128 [[TMP60]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP61]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP62:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP63:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP61]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP62:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP63:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP64:%.*]] = tail call i128 @llvm.s390.vgfmg(<2 x i64> [[TMP62]], <2 x i64> [[TMP63]]) // CHECK-NEXT: [[TMP65:%.*]] = bitcast i128 [[TMP64]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP65]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP66:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP67:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP68:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP65]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP66:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP67:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP68:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP69:%.*]] = bitcast <16 x i8> [[TMP68]] to i128 // CHECK-NEXT: [[TMP70:%.*]] = tail call i128 @llvm.s390.vgfmag(<2 x i64> [[TMP66]], <2 x i64> [[TMP67]], i128 [[TMP69]]) // CHECK-NEXT: [[TMP71:%.*]] = bitcast i128 [[TMP70]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP71]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP72:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP73:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP74:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP71]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP72:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP73:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP74:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP75:%.*]] = bitcast <16 x i8> [[TMP74]] to i128 // CHECK-NEXT: [[TMP76:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP72]], <2 x i64> [[TMP73]], i128 [[TMP75]], i32 0) // CHECK-NEXT: [[TMP77:%.*]] = bitcast i128 [[TMP76]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP77]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP78:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP79:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP80:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP77]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP78:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP79:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP80:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP81:%.*]] = bitcast <16 x i8> [[TMP80]] to i128 // CHECK-NEXT: [[TMP82:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP78]], <2 x i64> [[TMP79]], i128 [[TMP81]], i32 4) // CHECK-NEXT: [[TMP83:%.*]] = bitcast i128 [[TMP82]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP83]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP84:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP85:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP86:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP83]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP84:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP85:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP86:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP87:%.*]] = bitcast <16 x i8> [[TMP86]] to i128 // CHECK-NEXT: [[TMP88:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP84]], <2 x i64> [[TMP85]], i128 [[TMP87]], i32 8) // CHECK-NEXT: [[TMP89:%.*]] = bitcast i128 [[TMP88]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP89]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP90:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP91:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP92:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP89]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP90:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP91:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP92:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP93:%.*]] = bitcast <16 x i8> [[TMP92]] to i128 // CHECK-NEXT: [[TMP94:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP90]], <2 x i64> [[TMP91]], i128 [[TMP93]], i32 12) // CHECK-NEXT: [[TMP95:%.*]] = bitcast i128 [[TMP94]] to <16 x i8> -// CHECK-NEXT: store volatile <16 x i8> [[TMP95]], ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP96:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP97:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <16 x i8> [[TMP95]], ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP96:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP97:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[TMP98:%.*]] = tail call <2 x i64> @llvm.s390.vbperm(<16 x i8> [[TMP96]], <16 x i8> [[TMP97]]) -// CHECK-NEXT: store volatile <2 x i64> [[TMP98]], ptr @vul, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <2 x i64> [[TMP98]], ptr @vul, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test(void) { @@ -159,7 +159,7 @@ void test(void) { vul = vec_bperm_u128(vuc, vuc); } //. -// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[CHAR_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} //. diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c index e3db2063312d2..5f3b0ec546462 100644 --- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c +++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s // // Test GNU atomic builtins for __int128 aligned to 16 bytes, which should be @@ -13,21 +13,23 @@ __int128 Val __attribute__((aligned(16))); __int128 Exp __attribute__((aligned(16))); __int128 Des __attribute__((aligned(16))); -// CHECK-LABEL: @f1( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f1( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2:![0-9]+]] // CHECK-NEXT: ret void // __int128 f1() { return __atomic_load_n(&Ptr, memory_order_seq_cst); } -// CHECK-LABEL: @f2( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f2( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16 // CHECK-NEXT: store i128 [[TMP0]], ptr @Ret, align 16 -// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f2() { @@ -35,9 +37,10 @@ __int128 f2() { return Ret; } -// CHECK-LABEL: @f3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f3( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 16 // CHECK-NEXT: ret void // @@ -45,8 +48,9 @@ void f3() { __atomic_store_n(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f4( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f4( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16 // CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 16 // CHECK-NEXT: ret void @@ -55,23 +59,25 @@ void f4() { __atomic_store(&Ptr, &Val, memory_order_seq_cst); } -// CHECK-LABEL: @f5( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f5( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f5() { return __atomic_exchange_n(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f6( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f6( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16 // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: store i128 [[TMP1]], ptr @Ret, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f6() { @@ -79,18 +85,19 @@ __int128 f6() { return Ret; } -// CHECK-LABEL: @f7( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local noundef zeroext i1 @f7( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 16 // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1 -// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]] -// CHECK: cmpxchg.store_expected: +// CHECK-NEXT: br i1 [[TMP3]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] +// CHECK: [[CMPXCHG_STORE_EXPECTED]]: // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0 // CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16 -// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]] -// CHECK: cmpxchg.continue: +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] +// CHECK: [[CMPXCHG_CONTINUE]]: // CHECK-NEXT: ret i1 [[TMP3]] // _Bool f7() { @@ -98,18 +105,19 @@ _Bool f7() { memory_order_seq_cst, memory_order_seq_cst); } -// CHECK-LABEL: @f8( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef zeroext i1 @f8( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Exp, align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Des, align 16 // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1 -// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]] -// CHECK: cmpxchg.store_expected: +// CHECK-NEXT: br i1 [[TMP3]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] +// CHECK: [[CMPXCHG_STORE_EXPECTED]]: // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0 // CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16 -// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]] -// CHECK: cmpxchg.continue: +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] +// CHECK: [[CMPXCHG_CONTINUE]]: // CHECK-NEXT: ret i1 [[TMP3]] // _Bool f8() { @@ -117,141 +125,159 @@ _Bool f8() { memory_order_seq_cst, memory_order_seq_cst); } -// CHECK-LABEL: @f9( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f9( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f9() { return __atomic_add_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f10( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f10( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f10() { return __atomic_sub_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f11( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f11( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f11() { return __atomic_and_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f12( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f12( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f12() { return __atomic_xor_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f13( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f13( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f13() { return __atomic_or_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f14( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f14( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], -1 -// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f14() { return __atomic_nand_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f15( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f15( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f15() { return __atomic_fetch_add(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f16( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f16() { return __atomic_fetch_sub(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f17( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f17( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f17() { return __atomic_fetch_and(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f18( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f18( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f18() { return __atomic_fetch_xor(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f19( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f19( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f19() { return __atomic_fetch_or(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f20( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f20( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f20() { return __atomic_fetch_nand(&Ptr, Val, memory_order_seq_cst); } +//. +// CHECK: [[__INT128_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"__int128", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c index 8759df7b19c63..3ac5959a29dcb 100644 --- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c +++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s // // Test GNU atomic builtins for __int128 (with default alignment of 8 bytes @@ -18,21 +18,23 @@ __int128 Des; // pass. It seems that a 'writable' attribute should now be added to the argument // in order for this optimization to proceed. -// CHECK-LABEL: @f1( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f1( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2:![0-9]+]] // CHECK-NEXT: ret void // __int128 f1() { return __atomic_load_n(&Ptr, memory_order_seq_cst); } -// CHECK-LABEL: @f2( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f2( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 8 // CHECK-NEXT: store i128 [[TMP0]], ptr @Ret, align 8 -// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f2() { @@ -40,9 +42,10 @@ __int128 f2() { return Ret; } -// CHECK-LABEL: @f3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f3( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 8 // CHECK-NEXT: ret void // @@ -50,8 +53,9 @@ void f3() { __atomic_store_n(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f4( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f4( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8 // CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 8 // CHECK-NEXT: ret void @@ -60,23 +64,25 @@ void f4() { __atomic_store(&Ptr, &Val, memory_order_seq_cst); } -// CHECK-LABEL: @f5( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f5( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f5() { return __atomic_exchange_n(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f6( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f6( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8 // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: store i128 [[TMP1]], ptr @Ret, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f6() { @@ -84,18 +90,19 @@ __int128 f6() { return Ret; } -// CHECK-LABEL: @f7( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local noundef zeroext i1 @f7( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 8 // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 8 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1 -// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]] -// CHECK: cmpxchg.store_expected: +// CHECK-NEXT: br i1 [[TMP3]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] +// CHECK: [[CMPXCHG_STORE_EXPECTED]]: // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0 // CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 8 -// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]] -// CHECK: cmpxchg.continue: +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] +// CHECK: [[CMPXCHG_CONTINUE]]: // CHECK-NEXT: ret i1 [[TMP3]] // _Bool f7() { @@ -103,18 +110,19 @@ _Bool f7() { memory_order_seq_cst, memory_order_seq_cst); } -// CHECK-LABEL: @f8( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef zeroext i1 @f8( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Exp, align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Des, align 8 // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 8 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1 -// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]] -// CHECK: cmpxchg.store_expected: +// CHECK-NEXT: br i1 [[TMP3]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] +// CHECK: [[CMPXCHG_STORE_EXPECTED]]: // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0 // CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 8 -// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]] -// CHECK: cmpxchg.continue: +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] +// CHECK: [[CMPXCHG_CONTINUE]]: // CHECK-NEXT: ret i1 [[TMP3]] // _Bool f8() { @@ -122,141 +130,159 @@ _Bool f8() { memory_order_seq_cst, memory_order_seq_cst); } -// CHECK-LABEL: @f9( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f9( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f9() { return __atomic_add_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f10( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f10( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f10() { return __atomic_sub_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f11( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f11( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f11() { return __atomic_and_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f12( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f12( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f12() { return __atomic_xor_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f13( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f13( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f13() { return __atomic_or_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f14( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f14( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], -1 -// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f14() { return __atomic_nand_fetch(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f15( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f15( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f15() { return __atomic_fetch_add(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f16( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f16() { return __atomic_fetch_sub(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f17( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f17( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f17() { return __atomic_fetch_and(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f18( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f18( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f18() { return __atomic_fetch_xor(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f19( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f19( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f19() { return __atomic_fetch_or(&Ptr, Val, memory_order_seq_cst); } -// CHECK-LABEL: @f20( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f20( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 8 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f20() { return __atomic_fetch_nand(&Ptr, Val, memory_order_seq_cst); } +//. +// CHECK: [[__INT128_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"__int128", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c index e80f2b6920845..601bd7fa16153 100644 --- a/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c +++ b/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - \ // RUN: | FileCheck %s // @@ -10,149 +10,162 @@ __int128 Ptr __attribute__((aligned(16))); __int128 Val __attribute__((aligned(16))); __int128 OldVal __attribute__((aligned(16))); -// CHECK-LABEL: @f1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @f1( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f1() { return __sync_fetch_and_add(&Ptr, Val); } -// CHECK-LABEL: @f2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f2( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f2() { return __sync_fetch_and_sub(&Ptr, Val); } -// CHECK-LABEL: @f3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f3( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f3() { return __sync_fetch_and_or(&Ptr, Val); } -// CHECK-LABEL: @f4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f4( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f4() { return __sync_fetch_and_and(&Ptr, Val); } -// CHECK-LABEL: @f5( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f5( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f5() { return __sync_fetch_and_xor(&Ptr, Val); } -// CHECK-LABEL: @f6( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f6( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f6() { return __sync_fetch_and_nand(&Ptr, Val); } -// CHECK-LABEL: @f7( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f7( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f7() { return __sync_add_and_fetch(&Ptr, Val); } -// CHECK-LABEL: @f8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f8( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f8() { return __sync_sub_and_fetch(&Ptr, Val); } -// CHECK-LABEL: @f9( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f9( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f9() { return __sync_or_and_fetch(&Ptr, Val); } -// CHECK-LABEL: @f10( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f10( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f10() { return __sync_and_and_fetch(&Ptr, Val); } -// CHECK-LABEL: @f11( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f11( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]] -// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f11() { return __sync_xor_and_fetch(&Ptr, Val); } -// CHECK-LABEL: @f12( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f12( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 // CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], -1 -// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f12() { return __sync_nand_and_fetch(&Ptr, Val); } -// CHECK-LABEL: @f13( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local zeroext i1 @f13( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1 // CHECK-NEXT: ret i1 [[TMP3]] @@ -161,32 +174,35 @@ _Bool f13() { return __sync_bool_compare_and_swap(&Ptr, OldVal, Val); } -// CHECK-LABEL: @f14( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f14( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0 -// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f14() { return __sync_val_compare_and_swap(&Ptr, OldVal, Val); } -// CHECK-LABEL: @f15( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f15( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f15() { return __sync_lock_test_and_set(&Ptr, Val); } -// CHECK-LABEL: @f16( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f16( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: store atomic i128 0, ptr @Ptr release, align 16 // CHECK-NEXT: ret void // @@ -194,11 +210,12 @@ void f16() { return __sync_lock_release(&Ptr); } -// CHECK-LABEL: @f17( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @f17( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: ret void // __int128 f17() { @@ -206,14 +223,21 @@ __int128 f17() { } // Test that a statement expression compiles. -// CHECK-LABEL: @f18( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @f18( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[T_ADDR:%.*]] = alloca i128, align 8 -// CHECK-NEXT: [[T:%.*]] = load i128, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store i128 [[T]], ptr [[T_ADDR]], align 8, !tbaa [[TBAA2]] +// CHECK-NEXT: [[T:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[__INT128_TBAA2]] +// CHECK-NEXT: store i128 [[T]], ptr [[T_ADDR]], align 8, !tbaa [[__INT128_TBAA2]] // CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[T_ADDR]], i128 [[T]], i128 [[T]] seq_cst seq_cst, align 16 // CHECK-NEXT: ret void // void f18(__int128 t) { __sync_bool_compare_and_swap(({int x = 1; &t;}), t, t); } +//. +// CHECK: [[__INT128_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"__int128", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/SystemZ/zvector2.c b/clang/test/CodeGen/SystemZ/zvector2.c index b021ae8534353..f00fcdd52c401 100644 --- a/clang/test/CodeGen/SystemZ/zvector2.c +++ b/clang/test/CodeGen/SystemZ/zvector2.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple s390x-linux-gnu -target-cpu z14 -fzvector \ // RUN: -O -emit-llvm -o - -W -Wall -Werror %s | FileCheck %s @@ -8,8 +8,8 @@ volatile vector bool int bi; // CHECK-LABEL: define dso_local void @test_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3:![0-9]+]] -// CHECK-NEXT: store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3:![0-9]+]] +// CHECK-NEXT: store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_assign (void) @@ -20,8 +20,8 @@ void test_assign (void) // CHECK-LABEL: define dso_local void @test_pos( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[TMP0]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_pos (void) @@ -32,9 +32,9 @@ void test_pos (void) // CHECK-LABEL: define dso_local void @test_neg( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[TMP0]] -// CHECK-NEXT: store volatile <4 x float> [[FNEG]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[FNEG]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_neg (void) @@ -45,9 +45,9 @@ void test_neg (void) // CHECK-LABEL: define dso_local void @test_preinc( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[INC:%.*]] = fadd <4 x float> [[TMP0]], splat (float 1.000000e+00) -// CHECK-NEXT: store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_preinc (void) @@ -58,9 +58,9 @@ void test_preinc (void) // CHECK-LABEL: define dso_local void @test_postinc( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[INC:%.*]] = fadd <4 x float> [[TMP0]], splat (float 1.000000e+00) -// CHECK-NEXT: store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[INC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_postinc (void) @@ -71,9 +71,9 @@ void test_postinc (void) // CHECK-LABEL: define dso_local void @test_predec( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[DEC:%.*]] = fadd <4 x float> [[TMP0]], splat (float -1.000000e+00) -// CHECK-NEXT: store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_predec (void) @@ -84,9 +84,9 @@ void test_predec (void) // CHECK-LABEL: define dso_local void @test_postdec( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[DEC:%.*]] = fadd <4 x float> [[TMP0]], splat (float -1.000000e+00) -// CHECK-NEXT: store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[DEC]], ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_postdec (void) @@ -97,10 +97,10 @@ void test_postdec (void) // CHECK-LABEL: define dso_local void @test_add( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_add (void) @@ -111,10 +111,10 @@ void test_add (void) // CHECK-LABEL: define dso_local void @test_add_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[ADD]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_add_assign (void) @@ -125,10 +125,10 @@ void test_add_assign (void) // CHECK-LABEL: define dso_local void @test_sub( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[SUB:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_sub (void) @@ -139,10 +139,10 @@ void test_sub (void) // CHECK-LABEL: define dso_local void @test_sub_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[SUB:%.*]] = fsub <4 x float> [[TMP1]], [[TMP0]] -// CHECK-NEXT: store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[SUB]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_sub_assign (void) @@ -153,10 +153,10 @@ void test_sub_assign (void) // CHECK-LABEL: define dso_local void @test_mul( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_mul (void) @@ -167,10 +167,10 @@ void test_mul (void) // CHECK-LABEL: define dso_local void @test_mul_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[MUL]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_mul_assign (void) @@ -181,10 +181,10 @@ void test_mul_assign (void) // CHECK-LABEL: define dso_local void @test_div( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_div (void) @@ -195,10 +195,10 @@ void test_div (void) // CHECK-LABEL: define dso_local void @test_div_assign( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x float> [[TMP1]], [[TMP0]] -// CHECK-NEXT: store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x float> [[DIV]], ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_div_assign (void) @@ -209,11 +209,11 @@ void test_div_assign (void) // CHECK-LABEL: define dso_local void @test_cmpeq( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_cmpeq (void) @@ -224,11 +224,11 @@ void test_cmpeq (void) // CHECK-LABEL: define dso_local void @test_cmpne( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[CMP:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_cmpne (void) @@ -239,11 +239,11 @@ void test_cmpne (void) // CHECK-LABEL: define dso_local void @test_cmpge( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[CMP:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_cmpge (void) @@ -254,11 +254,11 @@ void test_cmpge (void) // CHECK-LABEL: define dso_local void @test_cmpgt( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[CMP:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_cmpgt (void) @@ -269,11 +269,11 @@ void test_cmpgt (void) // CHECK-LABEL: define dso_local void @test_cmple( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[CMP:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_cmple (void) @@ -284,11 +284,11 @@ void test_cmple (void) // CHECK-LABEL: define dso_local void @test_cmplt( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x float>, ptr @ff, align 8, !tbaa [[CHAR_TBAA3]] +// CHECK-NEXT: [[TMP1:%.*]] = load volatile <4 x float>, ptr @ff2, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store volatile <4 x i32> [[SEXT]], ptr @bi, align 8, !tbaa [[CHAR_TBAA3]] // CHECK-NEXT: ret void // void test_cmplt (void) @@ -297,7 +297,7 @@ void test_cmplt (void) } //. -// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[CHAR_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} //. diff --git a/clang/test/CodeGen/allow-ubsan-check.c b/clang/test/CodeGen/allow-ubsan-check.c index 6de7676951c90..8d30e29886046 100644 --- a/clang/test/CodeGen/allow-ubsan-check.c +++ b/clang/test/CodeGen/allow-ubsan-check.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // // We can't use -fsanitize-skip-hot-cutoff because that includes both -ubsan-guard-checks and //-lower-allow-check-percentile-cutoff. @@ -98,7 +98,7 @@ int div(int x, int y) { // CHECK-NEXT: tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 0) #[[ATTR6]], !nosanitize [[META2]] // CHECK-NEXT: unreachable, !nosanitize [[META2]] // CHECK: [[CONT]]: -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA5:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] // CHECK-NEXT: ret i32 [[TMP2]] // // TR-LABEL: define dso_local i32 @null( @@ -112,7 +112,7 @@ int div(int x, int y) { // TR-NEXT: tail call void @llvm.ubsantrap(i8 22) #[[ATTR7]], !nosanitize [[META2]] // TR-NEXT: unreachable, !nosanitize [[META2]] // TR: [[CONT]]: -// TR-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA5:![0-9]+]] +// TR-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] // TR-NEXT: ret i32 [[TMP2]] // // REC-LABEL: define dso_local i32 @null( @@ -126,7 +126,7 @@ int div(int x, int y) { // REC-NEXT: tail call void @__ubsan_handle_type_mismatch_v1(ptr nonnull @[[GLOB2:[0-9]+]], i64 0) #[[ATTR6]], !nosanitize [[META2]] // REC-NEXT: br label %[[CONT]], !nosanitize [[META2]] // REC: [[CONT]]: -// REC-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA5:![0-9]+]] +// REC-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] // REC-NEXT: ret i32 [[TMP2]] // int null(int* x) { @@ -205,7 +205,7 @@ void use(double*); // CHECK-NEXT: br i1 [[TMP3]], label %[[TRAP:.*]], label %[[BB4:.*]] // CHECK: [[BB4]]: // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA9:![0-9]+]] +// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]] // CHECK-NEXT: ret double [[TMP5]] // CHECK: [[TRAP]]: // CHECK-NEXT: call void @__ubsan_handle_local_out_of_bounds_abort() #[[ATTR6]], !nosanitize [[META2]] @@ -224,7 +224,7 @@ void use(double*); // TR-NEXT: br i1 [[TMP3]], label %[[TRAP:.*]], label %[[BB4:.*]] // TR: [[BB4]]: // TR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[IDXPROM]] -// TR-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA9:![0-9]+]] +// TR-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]] // TR-NEXT: ret double [[TMP5]] // TR: [[TRAP]]: // TR-NEXT: call void @llvm.ubsantrap(i8 71) #[[ATTR7]], !nosanitize [[META2]] @@ -243,7 +243,7 @@ void use(double*); // REC-NEXT: br i1 [[TMP3]], label %[[TRAP:.*]], label %[[BB4:.*]] // REC: [[BB4]]: // REC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[IDXPROM]] -// REC-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA9:![0-9]+]] +// REC-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]] // REC-NEXT: ret double [[TMP5]] // REC: [[TRAP]]: // REC-NEXT: call void @__ubsan_handle_local_out_of_bounds() #[[ATTR6]], !nosanitize [[META2]] @@ -259,30 +259,30 @@ double lbounds(int b, int i) { // CHECK: [[META2]] = !{} // CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} // CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 1048575} -// CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +// CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} // CHECK: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0} // CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} // CHECK: [[META8]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// CHECK: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} // CHECK: [[META10]] = !{!"double", [[META7]], i64 0} //. // TR: [[META2]] = !{} // TR: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} // TR: [[PROF4]] = !{!"branch_weights", i32 1, i32 1048575} -// TR: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +// TR: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} // TR: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0} // TR: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} // TR: [[META8]] = !{!"Simple C/C++ TBAA"} -// TR: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// TR: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} // TR: [[META10]] = !{!"double", [[META7]], i64 0} //. // REC: [[META2]] = !{} // REC: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} // REC: [[PROF4]] = !{!"branch_weights", i32 1, i32 1048575} -// REC: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +// REC: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} // REC: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0} // REC: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} // REC: [[META8]] = !{!"Simple C/C++ TBAA"} -// REC: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// REC: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} // REC: [[META10]] = !{!"double", [[META7]], i64 0} //. diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c index c5a410193bfb7..847ce67fcc31b 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512 @@ -28,24 +28,27 @@ DEFINE_STRUCT(bool) // int64 //===----------------------------------------------------------------------===// -// CHECK-128-LABEL: @read_int64( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-128-LABEL: define dso_local @read_int64( +// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-256-LABEL: @read_int64( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-256-LABEL: define dso_local @read_int64( +// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v4i64( poison, <4 x i64> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-512-LABEL: @read_int64( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64 -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-512-LABEL: define dso_local @read_int64( +// CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v8i64( poison, <8 x i64> [[TMP0]], i64 0) // CHECK-512-NEXT: ret [[CAST_SCALABLE]] // @@ -53,25 +56,28 @@ svint64_t read_int64(struct struct_int64 *s) { return s->y[0]; } -// CHECK-128-LABEL: @write_int64( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( [[X:%.*]], i64 0) -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-128-LABEL: define dso_local void @write_int64( +// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( [[X]], i64 0) +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-128-NEXT: ret void // -// CHECK-256-LABEL: @write_int64( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[X:%.*]], i64 0) -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-256-LABEL: define dso_local void @write_int64( +// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[X]], i64 0) +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: store <4 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-256-NEXT: ret void // -// CHECK-512-LABEL: @write_int64( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[X:%.*]], i64 0) -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64 -// CHECK-512-NEXT: store <8 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-512-LABEL: define dso_local void @write_int64( +// CHECK-512-SAME: ptr noundef writeonly captures(none) initializes((64, 128)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[X]], i64 0) +// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 +// CHECK-512-NEXT: store <8 x i64> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-512-NEXT: ret void // void write_int64(struct struct_int64 *s, svint64_t x) { @@ -82,24 +88,27 @@ void write_int64(struct struct_int64 *s, svint64_t x) { // float64 //===----------------------------------------------------------------------===// -// CHECK-128-LABEL: @read_float64( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-128-LABEL: define dso_local @read_float64( +// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2f64.v2f64( poison, <2 x double> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-256-LABEL: @read_float64( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-256-LABEL: define dso_local @read_float64( +// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2f64.v4f64( poison, <4 x double> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-512-LABEL: @read_float64( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64 -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x double>, ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-512-LABEL: define dso_local @read_float64( +// CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x double>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2f64.v8f64( poison, <8 x double> [[TMP0]], i64 0) // CHECK-512-NEXT: ret [[CAST_SCALABLE]] // @@ -107,25 +116,28 @@ svfloat64_t read_float64(struct struct_float64 *s) { return s->y[0]; } -// CHECK-128-LABEL: @write_float64( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64( [[X:%.*]], i64 0) -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-128-LABEL: define dso_local void @write_float64( +// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64( [[X]], i64 0) +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: store <2 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-128-NEXT: ret void // -// CHECK-256-LABEL: @write_float64( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv2f64( [[X:%.*]], i64 0) -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-256-LABEL: define dso_local void @write_float64( +// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv2f64( [[X]], i64 0) +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: store <4 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-256-NEXT: ret void // -// CHECK-512-LABEL: @write_float64( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x double> @llvm.vector.extract.v8f64.nxv2f64( [[X:%.*]], i64 0) -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64 -// CHECK-512-NEXT: store <8 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-512-LABEL: define dso_local void @write_float64( +// CHECK-512-SAME: ptr noundef writeonly captures(none) initializes((64, 128)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x double> @llvm.vector.extract.v8f64.nxv2f64( [[X]], i64 0) +// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 +// CHECK-512-NEXT: store <8 x double> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-512-NEXT: ret void // void write_float64(struct struct_float64 *s, svfloat64_t x) { @@ -136,24 +148,27 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) { // bfloat16 //===----------------------------------------------------------------------===// -// CHECK-128-LABEL: @read_bfloat16( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-128-LABEL: define dso_local @read_bfloat16( +// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> [[TMP0]], i64 0) // CHECK-128-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-256-LABEL: @read_bfloat16( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <16 x bfloat>, ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-256-LABEL: define dso_local @read_bfloat16( +// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: [[TMP0:%.*]] = load <16 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v16bf16( poison, <16 x bfloat> [[TMP0]], i64 0) // CHECK-256-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-512-LABEL: @read_bfloat16( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64 -// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-512-LABEL: define dso_local @read_bfloat16( +// CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 +// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v32bf16( poison, <32 x bfloat> [[TMP0]], i64 0) // CHECK-512-NEXT: ret [[CAST_SCALABLE]] // @@ -161,25 +176,28 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) { return s->y[0]; } -// CHECK-128-LABEL: @write_bfloat16( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16( [[X:%.*]], i64 0) -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: store <8 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-128-LABEL: define dso_local void @write_bfloat16( +// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((16, 32)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16( [[X]], i64 0) +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 16 +// CHECK-128-NEXT: store <8 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-128-NEXT: ret void // -// CHECK-256-LABEL: @write_bfloat16( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16( [[X:%.*]], i64 0) -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: store <16 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-256-LABEL: define dso_local void @write_bfloat16( +// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((32, 64)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16( [[X]], i64 0) +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 32 +// CHECK-256-NEXT: store <16 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-256-NEXT: ret void // -// CHECK-512-LABEL: @write_bfloat16( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16( [[X:%.*]], i64 0) -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 64 -// CHECK-512-NEXT: store <32 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[TBAA2]] +// CHECK-512-LABEL: define dso_local void @write_bfloat16( +// CHECK-512-SAME: ptr noundef writeonly captures(none) initializes((64, 128)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16( [[X]], i64 0) +// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 64 +// CHECK-512-NEXT: store <32 x bfloat> [[CAST_FIXED]], ptr [[Y]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-512-NEXT: ret void // void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { @@ -190,26 +208,29 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { // bool //===----------------------------------------------------------------------===// -// CHECK-128-LABEL: @read_bool( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 2 -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[Y]], align 2, !tbaa [[TBAA2]] +// CHECK-128-LABEL: define dso_local @read_bool( +// CHECK-128-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 2 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] // CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v2i8( poison, <2 x i8> [[TMP0]], i64 0) // CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: ret [[TMP1]] // -// CHECK-256-LABEL: @read_bool( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 4 -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[Y]], align 2, !tbaa [[TBAA2]] +// CHECK-256-LABEL: define dso_local @read_bool( +// CHECK-256-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4 +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] // CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v4i8( poison, <4 x i8> [[TMP0]], i64 0) // CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] // -// CHECK-512-LABEL: @read_bool( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8 -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 2, !tbaa [[TBAA2]] +// CHECK-512-LABEL: define dso_local @read_bool( +// CHECK-512-SAME: ptr noundef readonly captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] // CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) // CHECK-512-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-512-NEXT: ret [[TMP1]] @@ -218,30 +239,46 @@ svbool_t read_bool(struct struct_bool *s) { return s->y[0]; } -// CHECK-128-LABEL: @write_bool( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-128-LABEL: define dso_local void @write_bool( +// CHECK-128-SAME: ptr noundef writeonly captures(none) initializes((2, 4)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 2 -// CHECK-128-NEXT: store <2 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[TBAA2]] +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 2 +// CHECK-128-NEXT: store <2 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] // CHECK-128-NEXT: ret void // -// CHECK-256-LABEL: @write_bool( -// CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-256-LABEL: define dso_local void @write_bool( +// CHECK-256-SAME: ptr noundef writeonly captures(none) initializes((4, 8)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-256-NEXT: [[ENTRY:.*:]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 4 -// CHECK-256-NEXT: store <4 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[TBAA2]] +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4 +// CHECK-256-NEXT: store <4 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] // CHECK-256-NEXT: ret void // -// CHECK-512-LABEL: @write_bool( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-512-LABEL: define dso_local void @write_bool( +// CHECK-512-SAME: ptr noundef writeonly captures(none) initializes((8, 16)) [[S:%.*]], [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast [[X]] to // CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 8 -// CHECK-512-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[TBAA2]] +// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 8 +// CHECK-512-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 2, !tbaa [[CHAR_TBAA2]] // CHECK-512-NEXT: ret void // void write_bool(struct struct_bool *s, svbool_t x) { s->y[0] = x; } +//. +// CHECK-128: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-128: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK-128: [[META4]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-256: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-256: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK-256: [[META4]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-512: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-512: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK-512: [[META4]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c index fcd4314249ff8..bdaebf7ec1da7 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s // REQUIRES: aarch64-registered-target @@ -12,66 +12,74 @@ typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N))); typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N))); typedef int32_t gnu_int32_t __attribute__((vector_size(N / 8))); -// CHECK-LABEL: @to_svint32_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] +// CHECK-LABEL: define dso_local noundef @to_svint32_t( +// CHECK-SAME: noundef returned [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE_COERCE]] // svint32_t to_svint32_t(fixed_int32_t type) { return type; } -// CHECK-LABEL: @from_svint32_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE:%.*]] +// CHECK-LABEL: define dso_local @from_svint32_t( +// CHECK-SAME: returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE]] // fixed_int32_t from_svint32_t(svint32_t type) { return type; } -// CHECK-LABEL: @to_svfloat64_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] +// CHECK-LABEL: define dso_local noundef @to_svfloat64_t( +// CHECK-SAME: noundef returned [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE_COERCE]] // svfloat64_t to_svfloat64_t(fixed_float64_t type) { return type; } -// CHECK-LABEL: @from_svfloat64_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE:%.*]] +// CHECK-LABEL: define dso_local @from_svfloat64_t( +// CHECK-SAME: returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE]] // fixed_float64_t from_svfloat64_t(svfloat64_t type) { return type; } -// CHECK-LABEL: @to_svbool_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TMP0:%.*]] +// CHECK-LABEL: define dso_local noundef @to_svbool_t( +// CHECK-SAME: noundef returned [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TMP0]] // svbool_t to_svbool_t(fixed_bool_t type) { return type; } -// CHECK-LABEL: @from_svbool_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE:%.*]] +// CHECK-LABEL: define dso_local @from_svbool_t( +// CHECK-SAME: returned [[TYPE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret [[TYPE]] // fixed_bool_t from_svbool_t(svbool_t type) { return type; } -// CHECK-LABEL: @lax_cast( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[TYPE_COERCE:%.*]] to +// CHECK-LABEL: define dso_local noundef @lax_cast( +// CHECK-SAME: noundef [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[TYPE_COERCE]] to // CHECK-NEXT: ret [[TMP0]] // svint64_t lax_cast(fixed_int32_t type) { return type; } -// CHECK-LABEL: @to_svint32_t__from_gnu_int32_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local @to_svint32_t__from_gnu_int32_t( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -79,19 +87,21 @@ svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) { return type; } -// CHECK-LABEL: @from_svint32_t__to_gnu_int32_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE:%.*]], i64 0) -// CHECK-NEXT: store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @from_svint32_t__to_gnu_int32_t( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i32>) align 16 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], [[TYPE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE]], i64 0) +// CHECK-NEXT: store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) { return type; } -// CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local @to_fixed_int32_t__from_gnu_int32_t( +// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -99,12 +109,18 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) { return type; } -// CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE_COERCE:%.*]], i64 0) -// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @from_fixed_int32_t__to_gnu_int32_t( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<16 x i32>) align 16 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], noundef [[TYPE_COERCE:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE_COERCE]], i64 0) +// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) { return type; } +//. +// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c index 011518c60f52f..b604a06d76a30 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-128 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-512 @@ -20,46 +20,52 @@ fixed_bool_t global_bool; // WRITES //===----------------------------------------------------------------------===// -// CHECK-128-LABEL: @write_global_i64( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( [[V:%.*]], i64 0) -// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], ptr @global_i64, align 16, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-LABEL: define void @write_global_i64( +// CHECK-128-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( [[V]], i64 0) +// CHECK-128-NEXT: store <2 x i64> [[CAST_FIXED]], ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-128-NEXT: ret void // -// CHECK-512-LABEL: @write_global_i64( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[V:%.*]], i64 0) -// CHECK-512-NEXT: store <8 x i64> [[CASTFIXEDSVE]], ptr @global_i64, align 16, !tbaa [[TBAA6:![0-9]+]] +// CHECK-512-LABEL: define void @write_global_i64( +// CHECK-512-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[V]], i64 0) +// CHECK-512-NEXT: store <8 x i64> [[CAST_FIXED]], ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-512-NEXT: ret void // void write_global_i64(svint64_t v) { global_i64 = v; } -// CHECK-128-LABEL: @write_global_bf16( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16( [[V:%.*]], i64 0) -// CHECK-128-NEXT: store <8 x bfloat> [[CASTFIXEDSVE]], ptr @global_bf16, align 16, !tbaa [[TBAA6]] +// CHECK-128-LABEL: define void @write_global_bf16( +// CHECK-128-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16( [[V]], i64 0) +// CHECK-128-NEXT: store <8 x bfloat> [[CAST_FIXED]], ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]] // CHECK-128-NEXT: ret void // -// CHECK-512-LABEL: @write_global_bf16( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16( [[V:%.*]], i64 0) -// CHECK-512-NEXT: store <32 x bfloat> [[CASTFIXEDSVE]], ptr @global_bf16, align 16, !tbaa [[TBAA6]] +// CHECK-512-LABEL: define void @write_global_bf16( +// CHECK-512-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16( [[V]], i64 0) +// CHECK-512-NEXT: store <32 x bfloat> [[CAST_FIXED]], ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]] // CHECK-512-NEXT: ret void // void write_global_bf16(svbfloat16_t v) { global_bf16 = v; } -// CHECK-128-LABEL: @write_global_bool( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to -// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-128-NEXT: store <2 x i8> [[CASTFIXEDSVE]], ptr @global_bool, align 2, !tbaa [[TBAA6]] +// CHECK-128-LABEL: define void @write_global_bool( +// CHECK-128-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[V]] to +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0) +// CHECK-128-NEXT: store <2 x i8> [[CAST_FIXED]], ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]] // CHECK-128-NEXT: ret void // -// CHECK-512-LABEL: @write_global_bool( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to -// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0) -// CHECK-512-NEXT: store <8 x i8> [[CASTFIXEDSVE]], ptr @global_bool, align 2, !tbaa [[TBAA6]] +// CHECK-512-LABEL: define void @write_global_bool( +// CHECK-512-SAME: [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast [[V]] to +// CHECK-512-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0) +// CHECK-512-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]] // CHECK-512-NEXT: ret void // void write_global_bool(svbool_t v) { global_bool = v; } @@ -68,46 +74,61 @@ void write_global_bool(svbool_t v) { global_bool = v; } // READS //===----------------------------------------------------------------------===// -// CHECK-128-LABEL: @read_global_i64( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @global_i64, align 16, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> [[TMP0]], i64 0) -// CHECK-128-NEXT: ret [[CASTSCALABLESVE]] +// CHECK-128-LABEL: define @read_global_i64( +// CHECK-128-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> [[TMP0]], i64 0) +// CHECK-128-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-512-LABEL: @read_global_i64( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @global_i64, align 16, !tbaa [[TBAA6]] -// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v8i64( poison, <8 x i64> [[TMP0]], i64 0) -// CHECK-512-NEXT: ret [[CASTSCALABLESVE]] +// CHECK-512-LABEL: define @read_global_i64( +// CHECK-512-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @global_i64, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i64.v8i64( poison, <8 x i64> [[TMP0]], i64 0) +// CHECK-512-NEXT: ret [[CAST_SCALABLE]] // svint64_t read_global_i64() { return global_i64; } -// CHECK-128-LABEL: @read_global_bf16( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr @global_bf16, align 16, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> [[TMP0]], i64 0) -// CHECK-128-NEXT: ret [[CASTSCALABLESVE]] +// CHECK-128-LABEL: define @read_global_bf16( +// CHECK-128-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> [[TMP0]], i64 0) +// CHECK-128-NEXT: ret [[CAST_SCALABLE]] // -// CHECK-512-LABEL: @read_global_bf16( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr @global_bf16, align 16, !tbaa [[TBAA6]] -// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v32bf16( poison, <32 x bfloat> [[TMP0]], i64 0) -// CHECK-512-NEXT: ret [[CASTSCALABLESVE]] +// CHECK-512-LABEL: define @read_global_bf16( +// CHECK-512-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr @global_bf16, align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v32bf16( poison, <32 x bfloat> [[TMP0]], i64 0) +// CHECK-512-NEXT: ret [[CAST_SCALABLE]] // svbfloat16_t read_global_bf16() { return global_bf16; } -// CHECK-128-LABEL: @read_global_bool( -// CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool, align 2, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v2i8( poison, <2 x i8> [[TMP0]], i64 0) -// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CASTSCALABLESVE]] to +// CHECK-128-LABEL: define @read_global_bool( +// CHECK-128-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-128-NEXT: [[ENTRY:.*:]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v2i8( poison, <2 x i8> [[TMP0]], i64 0) +// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: ret [[TMP1]] // -// CHECK-512-LABEL: @read_global_bool( -// CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool, align 2, !tbaa [[TBAA6]] -// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) -// CHECK-512-NEXT: [[TMP1:%.*]] = bitcast [[CASTSCALABLESVE]] to +// CHECK-512-LABEL: define @read_global_bool( +// CHECK-512-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-512-NEXT: [[ENTRY:.*:]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool, align 2, !tbaa [[CHAR_TBAA2]] +// CHECK-512-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i8.v8i8( poison, <8 x i8> [[TMP0]], i64 0) +// CHECK-512-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-512-NEXT: ret [[TMP1]] // svbool_t read_global_bool() { return global_bool; } +//. +// CHECK-128: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-128: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK-128: [[META4]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-512: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-512: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK-512: [[META4]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/attr-counted-by-for-pointers.c b/clang/test/CodeGen/attr-counted-by-for-pointers.c index 0d72b58c78fd1..f7b737d5c5039 100644 --- a/clang/test/CodeGen/attr-counted-by-for-pointers.c +++ b/clang/test/CodeGen/attr-counted-by-for-pointers.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -DWITH_ATTRS -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -fexperimental-late-parse-attributes -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITH-ATTR %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -DWITH_ATTRS -Wall -fstrict-flex-arrays=3 -fexperimental-late-parse-attributes -emit-llvm -o - %s | FileCheck --check-prefix=NO-SANITIZE-WITH-ATTR %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -fexperimental-late-parse-attributes -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITHOUT-ATTR %s @@ -29,51 +29,51 @@ struct annotated_ptr { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont10: +// SANITIZE-WITH-ATTR: [[CONT10]]: // SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA4:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA4:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA13:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA13:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test1(struct annotated_ptr *p, int index, struct foo *value) { @@ -82,51 +82,51 @@ void test1(struct annotated_ptr *p, int index, struct foo *value) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont10: +// SANITIZE-WITH-ATTR: [[CONT10]]: // SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA13]] +// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA13]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]] +// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]] +// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test2(struct annotated_ptr *p, int index, struct foo *value) { @@ -135,51 +135,51 @@ void test2(struct annotated_ptr *p, int index, struct foo *value) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT10:%.*]], !prof [[PROF15:![0-9]+]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT10:.*]], !prof [[PROF15:![0-9]+]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont10: +// SANITIZE-WITH-ATTR: [[CONT10]]: // SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA13]] +// SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA13]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]] +// NO-SANITIZE-WITH-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]] +// SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[VALUE:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BUF:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BUF]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA11]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr [[VALUE]], ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS3FOOPTR_TBAA11]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test3(struct annotated_ptr *p, int index, struct foo *value) { @@ -188,7 +188,7 @@ void test3(struct annotated_ptr *p, int index, struct foo *value) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869184, 17179869177) i64 @test4( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -199,7 +199,7 @@ void test3(struct annotated_ptr *p, int index, struct foo *value) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869184, 17179869177) i64 @test4( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -210,12 +210,12 @@ void test3(struct annotated_ptr *p, int index, struct foo *value) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test4(struct annotated_ptr *p) { @@ -224,7 +224,7 @@ size_t test4(struct annotated_ptr *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869184, 17179869177) i64 @test5( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -235,7 +235,7 @@ size_t test4(struct annotated_ptr *p) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869184, 17179869177) i64 @test5( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -246,12 +246,12 @@ size_t test4(struct annotated_ptr *p) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test5( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test5( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test5(struct annotated_ptr *p, int index) { @@ -260,17 +260,17 @@ size_t test5(struct annotated_ptr *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 17179869177) i64 @test6( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT8:%.*]], !prof [[PROF15]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF15]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont8: +// SANITIZE-WITH-ATTR: [[CONT8]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = sub nsw i64 [[COUNT]], [[IDXPROM]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0) @@ -279,7 +279,7 @@ size_t test5(struct annotated_ptr *p, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -34359738360, 34359738361) i64 @test6( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -294,12 +294,12 @@ size_t test5(struct annotated_ptr *p, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test6( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test6( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test6(struct annotated_ptr *p, int index) { @@ -308,32 +308,32 @@ size_t test6(struct annotated_ptr *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT10:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont10: +// SANITIZE-WITH-ATTR: [[CONT10]]: // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test7( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test7( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test7(struct annotated_ptr *p, int index) { @@ -348,7 +348,7 @@ struct annotated_sized_ptr { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test8( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) @@ -357,7 +357,7 @@ struct annotated_sized_ptr { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test8( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) @@ -366,12 +366,12 @@ struct annotated_sized_ptr { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test8( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test8( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test8(struct annotated_sized_ptr *p, int index) { @@ -380,17 +380,17 @@ size_t test8(struct annotated_sized_ptr *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test9( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT8:%.*]], !prof [[PROF15]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF15]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont8: +// SANITIZE-WITH-ATTR: [[CONT8]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[RESULT:%.*]] = sub nsw i64 [[COUNT]], [[IDXPROM]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.smax.i64(i64 [[RESULT]], i64 0) @@ -398,7 +398,7 @@ size_t test8(struct annotated_sized_ptr *p, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -4294967295, 4294967296) i64 @test9( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -412,12 +412,12 @@ size_t test8(struct annotated_sized_ptr *p, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test9( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test9( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test9(struct annotated_sized_ptr *p, int index) { @@ -426,17 +426,17 @@ size_t test9(struct annotated_sized_ptr *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT8:%.*]], !prof [[PROF15]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT8:.*]], !prof [[PROF15]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont8: +// SANITIZE-WITH-ATTR: [[CONT8]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[INDEX_SIZE:%.*]] = shl nuw nsw i64 [[IDXPROM]], 2 // SANITIZE-WITH-ATTR-NEXT: [[RESULT:%.*]] = sub nsw i64 [[COUNT]], [[INDEX_SIZE]] @@ -445,7 +445,7 @@ size_t test9(struct annotated_sized_ptr *p, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -10737418236, 10737418240) i64 @test10( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -460,12 +460,12 @@ size_t test9(struct annotated_sized_ptr *p, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test10( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test10( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test10(struct annotated_sized_ptr *p, int index) { @@ -479,7 +479,7 @@ struct pr151236_struct { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test11( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1 @@ -490,7 +490,7 @@ struct pr151236_struct { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test11( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64 @@ -501,12 +501,12 @@ struct pr151236_struct { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test11( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -2 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test11( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -2 // size_t test11(struct pr151236_struct *p) { @@ -515,7 +515,7 @@ size_t test11(struct pr151236_struct *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test12( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1 @@ -526,7 +526,7 @@ size_t test11(struct pr151236_struct *p) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test12( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64 @@ -537,14 +537,66 @@ size_t test11(struct pr151236_struct *p) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test12( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -2 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test12( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -2 // size_t test12(struct pr151236_struct *p) { return __bdos(p->a) + __bdos(((int *)p->a)); } +//. +// SANITIZE-WITH-ATTR: [[META2]] = !{} +// SANITIZE-WITH-ATTR: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} +// SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA4]] = !{[[META5:![0-9]+]], [[META9:![0-9]+]], i64 8} +// SANITIZE-WITH-ATTR: [[META5]] = !{!"annotated_ptr", [[META6:![0-9]+]], i64 0, [[META9]], i64 8, [[META12:![0-9]+]], i64 16} +// SANITIZE-WITH-ATTR: [[META6]] = !{!"long", [[META7:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META8]] = !{!"Simple C/C++ TBAA"} +// SANITIZE-WITH-ATTR: [[META9]] = !{!"p2 _ZTS3foo", [[META10:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META10]] = !{!"any p2 pointer", [[META11:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META11]] = !{!"any pointer", [[META7]], i64 0} +// SANITIZE-WITH-ATTR: [[META12]] = !{!"int", [[META7]], i64 0} +// SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} +// SANITIZE-WITH-ATTR: [[META14]] = !{!"p1 _ZTS3foo", [[META11]], i64 0} +// SANITIZE-WITH-ATTR: [[PROF15]] = !{!"branch_weights", i32 1, i32 1048575} +//. +// NO-SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8} +// NO-SANITIZE-WITH-ATTR: [[META3]] = !{!"annotated_ptr", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META10:![0-9]+]], i64 16} +// NO-SANITIZE-WITH-ATTR: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META6]] = !{!"Simple C/C++ TBAA"} +// NO-SANITIZE-WITH-ATTR: [[META7]] = !{!"p2 _ZTS3foo", [[META8:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META9]] = !{!"any pointer", [[META5]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META10]] = !{!"int", [[META5]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[_ZTS3FOOPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META12]] = !{!"p1 _ZTS3foo", [[META9]], i64 0} +//. +// SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8} +// SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"annotated_ptr", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META10:![0-9]+]], i64 16} +// SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META6]] = !{!"Simple C/C++ TBAA"} +// SANITIZE-WITHOUT-ATTR: [[META7]] = !{!"p2 _ZTS3foo", [[META8:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"any pointer", [[META5]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"int", [[META5]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META12]] = !{!"p1 _ZTS3foo", [[META9]], i64 0} +//. +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8} +// NO-SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"annotated_ptr", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META10:![0-9]+]], i64 16} +// NO-SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META6]] = !{!"Simple C/C++ TBAA"} +// NO-SANITIZE-WITHOUT-ATTR: [[META7]] = !{!"p2 _ZTS3foo", [[META8:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META8]] = !{!"any p2 pointer", [[META9:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"any pointer", [[META5]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"int", [[META5]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS3FOOPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META12]] = !{!"p1 _ZTS3foo", [[META9]], i64 0} +//. diff --git a/clang/test/CodeGen/attr-counted-by-pr110385.c b/clang/test/CodeGen/attr-counted-by-pr110385.c index 412c12cb687c4..32ee1c8eb5dbe 100644 --- a/clang/test/CodeGen/attr-counted-by-pr110385.c +++ b/clang/test/CodeGen/attr-counted-by-pr110385.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wno-missing-declarations -emit-llvm -o - %s | FileCheck %s // See #110385 @@ -27,17 +27,17 @@ void init(void * __attribute__((pass_dynamic_object_size(0)))); // CHECK-LABEL: define dso_local void @test1( // CHECK-SAME: ptr noundef readonly captures(none) [[FOO:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[GROWABLE:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GROWABLE]], align 8, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GROWABLE]], align 8, !tbaa [[_ZTS8VARIABLEPTR_TBAA2:![0-9]+]] // CHECK-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12 // CHECK-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8 // CHECK-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 1 -// CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1 -// CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 0 -// CHECK-NEXT: tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP4]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 +// CHECK-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nsw i64 [[COUNT]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1 +// CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], i64 0 +// CHECK-NEXT: tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP2]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // void test1(struct bucket *foo) { @@ -46,22 +46,22 @@ void test1(struct bucket *foo) { // CHECK-LABEL: define dso_local void @test2( // CHECK-SAME: ptr noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 16 // CHECK-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 12 // CHECK-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1 -// CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0 -// CHECK-NEXT: tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 +// CHECK-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nsw i64 [[COUNT]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1 +// CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], i64 0 +// CHECK-NEXT: tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP1]]) #[[ATTR2]] // CHECK-NEXT: ret void // void test2(struct bucket2 *foo) { init(foo->growable.array); } //. -// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8} +// CHECK: [[_ZTS8VARIABLEPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8} // CHECK: [[META3]] = !{!"bucket", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META4]], i64 16} // CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c index cb23efdb8f263..9675fe21be366 100644 --- a/clang/test/CodeGen/attr-counted-by.c +++ b/clang/test/CodeGen/attr-counted-by.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -DCOUNTED_BY -O2 -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITH-ATTR %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -DCOUNTED_BY -O2 -Wall -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=NO-SANITIZE-WITH-ATTR %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wall -fsanitize=array-bounds,object-size,local-bounds -fstrict-flex-arrays=3 -emit-llvm -o - %s | FileCheck --check-prefix=SANITIZE-WITHOUT-ATTR %s @@ -60,47 +60,47 @@ struct anon_struct { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8:[0-9]+]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont3: +// SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test1( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef writeonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test1( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef writeonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test1(struct annotated *p, int index, int val) { @@ -109,49 +109,49 @@ void test1(struct annotated *p, int index, int val) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT6:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont6: +// SANITIZE-WITH-ATTR: [[CONT6]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = shl i32 [[TMP2]], 2 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) // NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = shl i32 [[TMP0]], 2 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test2( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test2(struct annotated *p, size_t index) { @@ -160,7 +160,7 @@ void test2(struct annotated *p, size_t index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -171,7 +171,7 @@ void test2(struct annotated *p, size_t index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -182,12 +182,12 @@ void test2(struct annotated *p, size_t index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test2_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test2_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test2_bdos(struct annotated *p) { @@ -196,7 +196,7 @@ size_t test2_bdos(struct annotated *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos_cast( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -207,7 +207,7 @@ size_t test2_bdos(struct annotated *p) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos_cast( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -218,12 +218,12 @@ size_t test2_bdos(struct annotated *p) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test2_bdos_cast( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test2_bdos_cast( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test2_bdos_cast(struct annotated *p) { @@ -232,43 +232,43 @@ size_t test2_bdos_cast(struct annotated *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont3: +// SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test3(struct annotated *p, size_t index) { @@ -279,22 +279,22 @@ void test3(struct annotated *p, size_t index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test3_bdos(struct annotated *p) { @@ -303,22 +303,22 @@ size_t test3_bdos(struct annotated *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos_cast( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos_cast( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos_cast( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos_cast( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test3_bdos_cast(struct annotated *p) { @@ -327,68 +327,68 @@ size_t test3_bdos_cast(struct annotated *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 2 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 3) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont1: +// SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl i32 [[DOTCOUNTED_BY_LOAD]], 2 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT12:%.*]], label [[HANDLER_OUT_OF_BOUNDS8:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds8: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT12:.*]], label %[[HANDLER_OUT_OF_BOUNDS8:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS8]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont12: +// SANITIZE-WITH-ATTR: [[CONT12]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[DOTCOUNTED_BY_LOAD]], 2 // SANITIZE-WITH-ATTR-NEXT: [[RESULT:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], 244 // SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = and i32 [[RESULT]], 252 // SANITIZE-WITH-ATTR-NEXT: [[CONV2:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 0 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV2]], ptr [[ARRAYIDX10]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV2]], ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT81:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 3 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT81]], label [[HANDLER_OUT_OF_BOUNDS18:%.*]], label [[CONT19:%.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds18: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT81]], label %[[HANDLER_OUT_OF_BOUNDS18:.*]], label %[[CONT19:.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS18]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 4) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont19: +// SANITIZE-WITH-ATTR: [[CONT19]]: // SANITIZE-WITH-ATTR-NEXT: [[ADD:%.*]] = add nsw i32 [[INDEX]], 1 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM31:%.*]] = sext i32 [[ADD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = icmp ult i64 [[IDXPROM31]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP5]], label [[CONT38:%.*]], label [[HANDLER_OUT_OF_BOUNDS34:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds34: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP5]], label %[[CONT38:.*]], label %[[HANDLER_OUT_OF_BOUNDS34:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS34]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM31]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont38: +// SANITIZE-WITH-ATTR: [[CONT38]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[DOTCOUNTED_BY_LOAD]], 3 // SANITIZE-WITH-ATTR-NEXT: [[RESULT25:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], 240 // SANITIZE-WITH-ATTR-NEXT: [[TMP7:%.*]] = and i32 [[RESULT25]], 252 // SANITIZE-WITH-ATTR-NEXT: [[CONV27:%.*]] = select i1 [[TMP6]], i32 [[TMP7]], i32 0 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM31]] -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV27]], ptr [[ARRAYIDX36]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV27]], ptr [[ARRAYIDX36]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM42:%.*]] = sext i32 [[FAM_IDX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD44:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP8:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD44]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM42]], [[TMP8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS45:%.*]], label [[CONT46:%.*]], !prof [[PROF8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds45: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS45:.*]], label %[[CONT46:.*]], !prof [[PROF8]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS45]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM42]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont46: +// SANITIZE-WITH-ATTR: [[CONT46]]: // SANITIZE-WITH-ATTR-NEXT: [[ADD59:%.*]] = add nsw i32 [[INDEX]], 2 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM60:%.*]] = sext i32 [[ADD59]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP9:%.*]] = icmp ult i64 [[IDXPROM60]], [[TMP8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP9]], label [[CONT67:%.*]], label [[HANDLER_OUT_OF_BOUNDS63:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds63: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP9]], label %[[CONT67:.*]], label %[[HANDLER_OUT_OF_BOUNDS63:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS63]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM60]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont67: +// SANITIZE-WITH-ATTR: [[CONT67]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM60]] // SANITIZE-WITH-ATTR-NEXT: [[COUNT50:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD44]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP10:%.*]] = sub nsw i64 [[COUNT50]], [[IDXPROM42]] @@ -396,12 +396,12 @@ size_t test3_bdos_cast(struct annotated *p) { // SANITIZE-WITH-ATTR-NEXT: [[DOTTR:%.*]] = trunc nuw nsw i64 [[TMP11]] to i32 // SANITIZE-WITH-ATTR-NEXT: [[CONV54:%.*]] = shl i32 [[DOTTR]], 2 // SANITIZE-WITH-ATTR-NEXT: [[CONV55:%.*]] = and i32 [[CONV54]], 252 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV55]], ptr [[ARRAYIDX65]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV55]], ptr [[ARRAYIDX65]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -412,7 +412,7 @@ size_t test3_bdos_cast(struct annotated *p) { // NO-SANITIZE-WITH-ATTR-NEXT: [[CONV1:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 0 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV1]], ptr [[ARRAYIDX3]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV1]], ptr [[ARRAYIDX3]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD7:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE9:%.*]] = shl i32 [[COUNTED_BY_LOAD7]], 2 // NO-SANITIZE-WITH-ATTR-NEXT: [[RESULT10:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE9]], 240 @@ -420,7 +420,7 @@ size_t test3_bdos_cast(struct annotated *p) { // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = and i32 [[RESULT10]], 252 // NO-SANITIZE-WITH-ATTR-NEXT: [[CONV12:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 0 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX15:%.*]] = getelementptr i8, ptr [[ARRAYIDX3]], i64 4 -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV12]], ptr [[ARRAYIDX15]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV12]], ptr [[ARRAYIDX15]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM17:%.*]] = sext i32 [[FAM_IDX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD20:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT21:%.*]] = sext i32 [[COUNTED_BY_LOAD20]] to i64 @@ -433,33 +433,33 @@ size_t test3_bdos_cast(struct annotated *p) { // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 252 // NO-SANITIZE-WITH-ATTR-NEXT: [[CONV26:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX30:%.*]] = getelementptr i8, ptr [[ARRAYIDX3]], i64 8 -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV26]], ptr [[ARRAYIDX30]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV26]], ptr [[ARRAYIDX30]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test4( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX5]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX5]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i8, ptr [[ARRAYIDX5]], i64 4 -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX18]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX18]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX33:%.*]] = getelementptr i8, ptr [[ARRAYIDX5]], i64 8 -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX33]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX33]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test4( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX3]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX3]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX10:%.*]] = getelementptr i8, ptr [[ARRAYIDX3]], i64 4 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX10]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX10]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX19:%.*]] = getelementptr i8, ptr [[ARRAYIDX3]], i64 8 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX19]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX19]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test4(struct annotated *p, int index, int fam_idx) { @@ -471,17 +471,17 @@ void test4(struct annotated *p, int index, int fam_idx) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934589) i64 @test4_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT1:%.*]], !prof [[PROF8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], label %[[CONT1:.*]], !prof [[PROF8]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont1: +// SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = sub nsw i64 [[COUNT]], [[IDXPROM]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0) @@ -490,7 +490,7 @@ void test4(struct annotated *p, int index, int fam_idx) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869180, 17179869181) i64 @test4_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -505,12 +505,12 @@ void test4(struct annotated *p, int index, int fam_idx) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test4_bdos(struct annotated *p, int index) { @@ -519,7 +519,7 @@ size_t test4_bdos(struct annotated *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -12884901886, 12884901885) i64 @test4_bdos_cast1( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -535,7 +535,7 @@ size_t test4_bdos(struct annotated *p, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -12884901886, 12884901885) i64 @test4_bdos_cast1( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -551,12 +551,12 @@ size_t test4_bdos(struct annotated *p, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos_cast1( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos_cast1( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test4_bdos_cast1(struct annotated *p, int index) { @@ -565,7 +565,7 @@ size_t test4_bdos_cast1(struct annotated *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -10737418239, 10737418237) i64 @test4_bdos_cast2( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -580,7 +580,7 @@ size_t test4_bdos_cast1(struct annotated *p, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -10737418239, 10737418237) i64 @test4_bdos_cast2( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -595,12 +595,12 @@ size_t test4_bdos_cast1(struct annotated *p, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos_cast2( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos_cast2( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test4_bdos_cast2(struct annotated *p, int index) { @@ -609,46 +609,46 @@ size_t test4_bdos_cast2(struct annotated *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test5( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[DOTCOUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont3: +// SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test5( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test5( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test5( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test5(struct anon_struct *p, int index) { @@ -657,22 +657,22 @@ void test5(struct anon_struct *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test5_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test5_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test5_bdos(struct anon_struct *p) { @@ -681,27 +681,27 @@ size_t test5_bdos(struct anon_struct *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test6( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i64, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT6:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont6: +// SANITIZE-WITH-ATTR: [[CONT6]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] // SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nuw i64 [[COUNTED_BY_LOAD]], 2 // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], i64 0) // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test6( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i64, ptr [[COUNTED_BY_GEP]], align 4 @@ -710,25 +710,25 @@ size_t test5_bdos(struct anon_struct *p) { // NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test6( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test6( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test6(struct anon_struct *p, int index) { @@ -737,7 +737,7 @@ void test6(struct anon_struct *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test6_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i64, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nuw i64 [[COUNTED_BY_LOAD]], 2 @@ -746,7 +746,7 @@ void test6(struct anon_struct *p, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test6_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i64, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nuw i64 [[COUNTED_BY_LOAD]], 2 @@ -755,12 +755,12 @@ void test6(struct anon_struct *p, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test6_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test6_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test6_bdos(struct anon_struct *p) { @@ -769,47 +769,47 @@ size_t test6_bdos(struct anon_struct *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test7( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i8 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont7: +// SANITIZE-WITH-ATTR: [[CONT7]]: // SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[INTS]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA9:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA9:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test7( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test7( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test7( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test7(struct union_of_fams *p, int index) { @@ -818,22 +818,22 @@ void test7(struct union_of_fams *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test7_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test7_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test7_bdos(struct union_of_fams *p) { @@ -842,49 +842,49 @@ size_t test7_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test8( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i8 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT14:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT14:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont14: +// SANITIZE-WITH-ATTR: [[CONT14]]: // SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[INTS]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store i8 [[COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA9]] +// SANITIZE-WITH-ATTR-NEXT: store i8 [[COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA9]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test8( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i8 [[COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i8 [[COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test8( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test8( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[INTS]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test8(struct union_of_fams *p, int index) { @@ -893,7 +893,7 @@ void test8(struct union_of_fams *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = zext i8 [[COUNTED_BY_LOAD]] to i64 @@ -901,7 +901,7 @@ void test8(struct union_of_fams *p, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = zext i8 [[COUNTED_BY_LOAD]] to i64 @@ -909,12 +909,12 @@ void test8(struct union_of_fams *p, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test8_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test8_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test8_bdos(struct union_of_fams *p) { @@ -923,47 +923,47 @@ size_t test8_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test9( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont7: +// SANITIZE-WITH-ATTR: [[CONT7]]: // SANITIZE-WITH-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[BYTES]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA9]] +// SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA9]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test9( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test9( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test9( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test9(struct union_of_fams *p, int index) { @@ -972,22 +972,22 @@ void test9(struct union_of_fams *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test9_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test9_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test9_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test9_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test9_bdos(struct union_of_fams *p) { @@ -996,27 +996,27 @@ size_t test9_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test10( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT14:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT14:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB19:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont14: +// SANITIZE-WITH-ATTR: [[CONT14]]: // SANITIZE-WITH-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[BYTES]], i64 [[IDXPROM]] // SANITIZE-WITH-ATTR-NEXT: [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = trunc i32 [[NARROW]] to i8 -// SANITIZE-WITH-ATTR-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA9]] +// SANITIZE-WITH-ATTR-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA9]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test10( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4 @@ -1024,25 +1024,25 @@ size_t test9_bdos(struct union_of_fams *p) { // NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = trunc i32 [[NARROW]] to i8 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test10( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test10( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test10(struct union_of_fams *p, int index) { @@ -1051,7 +1051,7 @@ void test10(struct union_of_fams *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) @@ -1060,7 +1060,7 @@ void test10(struct union_of_fams *p, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) @@ -1069,12 +1069,12 @@ void test10(struct union_of_fams *p, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test10_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test10_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test10_bdos(struct union_of_fams *p) { @@ -1083,29 +1083,29 @@ size_t test10_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test11( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT6:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT6:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont6: +// SANITIZE-WITH-ATTR: [[CONT6]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -3 // SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl i32 [[COUNTED_BY_LOAD]], 2 // SANITIZE-WITH-ATTR-NEXT: [[RESULT:%.*]] = add i32 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], 8 // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[TMP2]], i32 [[RESULT]], i32 0 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test11( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl i32 [[COUNTED_BY_LOAD]], 2 @@ -1115,25 +1115,25 @@ size_t test10_bdos(struct union_of_fams *p) { // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test11( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test11( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test11(struct annotated *p, int index) { @@ -1142,7 +1142,7 @@ void test11(struct annotated *p, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934584, 8589934597) i64 @test11_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[COUNT1:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -1154,7 +1154,7 @@ void test11(struct annotated *p, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934584, 8589934597) i64 @test11_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT1:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -1166,12 +1166,12 @@ void test11(struct annotated *p, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test11_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test11_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test11_bdos(struct annotated *p) { @@ -1195,87 +1195,87 @@ int test12_a, test12_b; // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12( // SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4 // SANITIZE-WITH-ATTR-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR9:[0-9]+]] // SANITIZE-WITH-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT10:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[INDEX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB22:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont: +// SANITIZE-WITH-ATTR: [[CONT]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[BAZ]], i64 [[TMP1]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] -// SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr @test12_foo, align 4 // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF8]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds4: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS4:.*]], label %[[HANDLER_TYPE_MISMATCH6:.*]], !prof [[PROF8]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS4]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.type_mismatch6: +// SANITIZE-WITH-ATTR: [[HANDLER_TYPE_MISMATCH6]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12( // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR12:[0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[BAZ]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP0]], ptr @test12_b, align 4, !tbaa [[TBAA2]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4), align 4, !tbaa [[TBAA2]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP1]], ptr @test12_a, align 4, !tbaa [[TBAA2]] -// NO-SANITIZE-WITH-ATTR-NEXT: br label [[FOR_COND:%.*]] -// NO-SANITIZE-WITH-ATTR: for.cond: -// NO-SANITIZE-WITH-ATTR-NEXT: br label [[FOR_COND]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP0]], ptr @test12_b, align 4, !tbaa [[INT_TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4), align 4, !tbaa [[INT_TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[TMP1]], ptr @test12_a, align 4, !tbaa [[INT_TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: br label %[[FOR_COND:.*]] +// NO-SANITIZE-WITH-ATTR: [[FOR_COND]]: +// NO-SANITIZE-WITH-ATTR-NEXT: br label %[[FOR_COND]] // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test12( // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4 // SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR7:[0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6 // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[INDEX]] to i64 -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label [[CONT:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META9:![0-9]+]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META9:![0-9]+]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[TMP1]]) #[[ATTR8:[0-9]+]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: cont: +// SANITIZE-WITHOUT-ATTR: [[CONT]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[BAZ]], i64 [[TMP1]] -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr @test12_foo, align 4 // SANITIZE-WITHOUT-ATTR-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0 -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds4: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[DOTNOT]], label %[[HANDLER_OUT_OF_BOUNDS4:.*]], label %[[HANDLER_TYPE_MISMATCH6:.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS4]]: // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.type_mismatch6: +// SANITIZE-WITHOUT-ATTR: [[HANDLER_TYPE_MISMATCH6]]: // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test12( // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4 // NO-SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR10:[0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[BAZ]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[TMP0]], ptr @test12_b, align 4, !tbaa [[TBAA2]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4), align 4, !tbaa [[TBAA2]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[TMP1]], ptr @test12_a, align 4, !tbaa [[TBAA2]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: br label [[FOR_COND:%.*]] -// NO-SANITIZE-WITHOUT-ATTR: for.cond: -// NO-SANITIZE-WITHOUT-ATTR-NEXT: br label [[FOR_COND]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[TMP0]], ptr @test12_b, align 4, !tbaa [[INT_TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4), align 4, !tbaa [[INT_TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 [[TMP1]], ptr @test12_a, align 4, !tbaa [[INT_TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: br label %[[FOR_COND:.*]] +// NO-SANITIZE-WITHOUT-ATTR: [[FOR_COND]]: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: br label %[[FOR_COND]] // int test12(int index) { struct hang baz = test12_bar; @@ -1298,56 +1298,56 @@ struct test13_bar { // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test13( // SANITIZE-WITH-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA11:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[_ZTS10TEST13_BARPTR_TBAA11:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont5: +// SANITIZE-WITH-ATTR: [[CONT5]]: // SANITIZE-WITH-ATTR-NEXT: [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[REVMAP]], i64 [[INDEX]] -// SANITIZE-WITH-ATTR-NEXT: store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA15:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST13_FOOPTR_TBAA15:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: ret i32 0 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test13( // NO-SANITIZE-WITH-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA8:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[_ZTS10TEST13_BARPTR_TBAA8:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[REVMAP]], i64 [[INDEX]] -// NO-SANITIZE-WITH-ATTR-NEXT: store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA12:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST13_FOOPTR_TBAA12:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i32 0 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test13( // SANITIZE-WITHOUT-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA11:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[_ZTS10TEST13_BARPTR_TBAA11:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8 // SANITIZE-WITHOUT-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: cont5: +// SANITIZE-WITHOUT-ATTR: [[CONT5]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[REVMAP]], i64 [[INDEX]] -// SANITIZE-WITHOUT-ATTR-NEXT: store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA15:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST13_FOOPTR_TBAA15:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i32 0 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test13( // NO-SANITIZE-WITHOUT-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA8:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[_ZTS10TEST13_BARPTR_TBAA8:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[REVMAP]], i64 [[INDEX]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA12:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store ptr null, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST13_FOOPTR_TBAA12:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 0 // int test13(long index) { @@ -1362,52 +1362,52 @@ struct test14_foo { // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test14( // SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont3: +// SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: ret i32 undef // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test14( // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca [[STRUCT_TEST14_FOO:%.*]], align 4 -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 1, ptr [[DOTCOMPOUNDLITERAL]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 1, ptr [[DOTCOMPOUNDLITERAL]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTCOMPOUNDLITERAL]], i64 4 -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 2, ptr [[Y]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 2, ptr [[Y]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BLAH:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTCOMPOUNDLITERAL]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[BLAH]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP0]] // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test14( // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0 -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: cont3: +// SANITIZE-WITHOUT-ATTR: [[CONT3]]: // SANITIZE-WITHOUT-ATTR-NEXT: ret i32 undef // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test14( // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca [[STRUCT_TEST14_FOO:%.*]], align 4 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 1, ptr [[DOTCOMPOUNDLITERAL]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 1, ptr [[DOTCOMPOUNDLITERAL]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTCOMPOUNDLITERAL]], i64 4 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 2, ptr [[Y]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 2, ptr [[Y]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BLAH:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTCOMPOUNDLITERAL]], i64 8 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[BLAH]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP0]] // int test14(int idx) { @@ -1416,42 +1416,42 @@ int test14(int idx) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test15( // SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB31:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont1: +// SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: ret i32 undef // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test15( // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @__const.test15.foo, i64 8), i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP0]] // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test15( // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0 -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: cont1: +// SANITIZE-WITHOUT-ATTR: [[CONT1]]: // SANITIZE-WITHOUT-ATTR-NEXT: ret i32 undef // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test15( // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @__const.test15.foo, i64 8), i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP0]] // int test15(int idx) { @@ -1465,30 +1465,30 @@ int test15(int idx) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test19( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 680 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 1 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 2) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont1: +// SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test19( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test19( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test19( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test19(struct annotated *p) { @@ -1498,22 +1498,22 @@ size_t test19(struct annotated *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test20( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test20( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test20( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test20( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test20(struct annotated *p) { @@ -1523,22 +1523,22 @@ size_t test20(struct annotated *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test21( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test21( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test21( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test21( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test21(struct annotated *p) { @@ -1548,22 +1548,22 @@ size_t test21(struct annotated *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test22( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test22( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test22( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test22( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test22(struct annotated *p) { @@ -1573,22 +1573,22 @@ size_t test22(struct annotated *p) { // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test23( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test23( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test23( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test23( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test23(struct annotated *p) { @@ -1603,38 +1603,38 @@ struct tests_foo { // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test24( // SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 40 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT4:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB33:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont4: +// SANITIZE-WITH-ATTR: [[CONT4]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84 -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP1]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test24( // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[VAR:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84 -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP0]] // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test24( // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84 -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP0]] // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test24( // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[VAR:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP0]] // int test24(int c, struct tests_foo *var) { @@ -1644,41 +1644,41 @@ int test24(int c, struct tests_foo *var) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test25( // SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA17:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[_ZTS9TESTS_FOOPTR_TBAA17:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont5: +// SANITIZE-WITH-ATTR: [[CONT5]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44 -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP2]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test25( // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[VAR:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA14:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[_ZTS9TESTS_FOOPTR_TBAA14:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44 -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP1]] // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test25( // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA17:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[_ZTS9TESTS_FOOPTR_TBAA17:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44 -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP1]] // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test25( // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[VAR:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA14:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[_ZTS9TESTS_FOOPTR_TBAA14:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP1]] // int test25(int c, struct tests_foo **var) { @@ -1694,47 +1694,47 @@ struct test26_foo { // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test26( // SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[S:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[C]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[S]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT5:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB35:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont5: +// SANITIZE-WITH-ATTR: [[CONT5]]: // SANITIZE-WITH-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP2]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test26( // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[FOO:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[C]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP0]] // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test26( // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[C]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP0]] // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test26( // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef readonly captures(none) [[FOO:%.*]]) local_unnamed_addr #[[ATTR6]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[C]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP0]] // int test26(int c, struct test26_foo *foo) { @@ -1765,53 +1765,53 @@ struct test27_foo { // SANITIZE-WITH-ATTR-LABEL: define dso_local ptr @test27( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont3: +// SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[ENTRIES]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA19:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST27_BARPTR_TBAA19:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM4:%.*]] = sext i32 [[J]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [[STRUCT_TEST27_BAR:%.*]], ptr [[TMP2]], i64 [[IDXPROM4]] // SANITIZE-WITH-ATTR-NEXT: ret ptr [[ARRAYIDX5]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local ptr @test27( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ENTRIES]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA16:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST27_BARPTR_TBAA16:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM1:%.*]] = sext i32 [[J]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_TEST27_BAR:%.*]], ptr [[TMP0]], i64 [[IDXPROM1]] // NO-SANITIZE-WITH-ATTR-NEXT: ret ptr [[ARRAYIDX2]] // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local ptr @test27( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ENTRIES]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA19:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST27_BARPTR_TBAA19:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM3:%.*]] = sext i32 [[J]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCT_TEST27_BAR:%.*]], ptr [[TMP0]], i64 [[IDXPROM3]] // SANITIZE-WITHOUT-ATTR-NEXT: ret ptr [[ARRAYIDX4]] // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local ptr @test27( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) local_unnamed_addr #[[ATTR6]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ENTRIES]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA16:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS10TEST27_BARPTR_TBAA16:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM1:%.*]] = sext i32 [[J]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_TEST27_BAR:%.*]], ptr [[TMP0]], i64 [[IDXPROM1]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret ptr [[ARRAYIDX2]] @@ -1828,59 +1828,59 @@ struct test28_foo { // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test28( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: -// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA21:![0-9]+]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA21]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA21]] +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label [[CONT17:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label %[[CONT17:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont17: +// SANITIZE-WITH-ATTR: [[CONT17]]: // SANITIZE-WITH-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR]], i64 [[IDXPROM]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP5]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test28( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR8]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA18:![0-9]+]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA18]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA18]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i32 [[TMP3]] // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test28( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA21:![0-9]+]] -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA21]] -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA21]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA21]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]] -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP3]] // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test28( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR7]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA18:![0-9]+]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA18]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA18]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[_ZTS10TEST28_FOOPTR_TBAA18]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 [[TMP3]] // int test28(struct test28_foo *p, int i) { @@ -1896,39 +1896,39 @@ struct annotated_struct_array { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test29( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 10 // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX1]] to i64 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB41:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont3: +// SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[ANN]], i64 [[TMP1]] -// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA23:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS9ANNOTATEDPTR_TBAA23:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM27:%.*]] = sext i32 [[IDX2]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp ult i64 [[IDXPROM27]], [[TMP3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label [[CONT32:%.*]], label [[HANDLER_OUT_OF_BOUNDS28:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds28: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label %[[CONT32:.*]], label %[[HANDLER_OUT_OF_BOUNDS28:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS28]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB42:[0-9]+]], i64 [[IDXPROM27]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont32: +// SANITIZE-WITH-ATTR: [[CONT32]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IDXPROM27]] // SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.smax.i32(i32 [[COUNTED_BY_LOAD]], i32 0) // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = shl i32 [[TMP5]], 2 -// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX30]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX30]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test29( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX1]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ANN]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA20:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS9ANNOTATEDPTR_TBAA20:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -1936,37 +1936,37 @@ struct annotated_struct_array { // NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = shl i32 [[TMP1]], 2 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM8:%.*]] = sext i32 [[IDX2]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM8]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX9]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX9]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test29( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 10 // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX1]] to i64 -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label [[CONT21:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT21:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: cont21: +// SANITIZE-WITHOUT-ATTR: [[CONT21]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[ANN]], i64 [[TMP1]] -// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA23:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS9ANNOTATEDPTR_TBAA23:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM18:%.*]] = sext i32 [[IDX2]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM18]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX19]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX19]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test29( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX1]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ANN]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA20:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[_ZTS9ANNOTATEDPTR_TBAA20:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM5:%.*]] = sext i32 [[IDX2]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IDXPROM5]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test29(struct annotated_struct_array *ann, int idx1, int idx2) { @@ -1986,34 +1986,34 @@ struct test30_struct { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB44:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[PCPU_REFCNT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[PCPU_REFCNT]], i64 [[IDXPROM]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test30( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test30( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[PCPU_REFCNT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[PCPU_REFCNT]], i64 [[IDXPROM]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA6]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test30(struct test30_struct *ptr, int idx) { @@ -2030,22 +2030,22 @@ struct test31_struct { // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test31( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i32 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test31( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i32 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test31( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i32 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test31( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 -1 // int test31(struct test31_struct *ptr, int idx) { @@ -2060,24 +2060,24 @@ struct annotated_with_array { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test32( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[IDX2]], 43 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX2]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB46:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont1: +// SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM4:%.*]] = sext i32 [[IDX1]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = zext i32 [[COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = icmp ult i64 [[IDXPROM4]], [[TMP2]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP3]], label [[CONT9:%.*]], label [[HANDLER_OUT_OF_BOUNDS5:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds5: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP3]], label %[[CONT9:.*]], label %[[HANDLER_OUT_OF_BOUNDS5:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS5]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB48:[0-9]+]], i64 [[IDXPROM4]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont9: +// SANITIZE-WITH-ATTR: [[CONT9]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i64, ptr [[ARRAY]], i64 [[IDXPROM4]] // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -2087,12 +2087,12 @@ struct annotated_with_array { // SANITIZE-WITH-ATTR-NEXT: [[REASS_SUB:%.*]] = sub nsw i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], [[FIELD_OFFSET]] // SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REASS_SUB]], i64 -344) // SANITIZE-WITH-ATTR-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP5]], 344 -// SANITIZE-WITH-ATTR-NEXT: store i64 [[TMP6]], ptr [[ARRAYIDX7]], align 8, !tbaa [[TBAA25:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: store i64 [[TMP6]], ptr [[ARRAYIDX7]], align 8, !tbaa [[LONG_TBAA25:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test32( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef captures(none) [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX2]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -2107,32 +2107,32 @@ struct annotated_with_array { // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344 // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM1:%.*]] = sext i32 [[IDX1]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[ARRAY]], i64 [[IDXPROM1]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i64 [[TMP4]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA22:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i64 [[TMP4]], ptr [[ARRAYIDX2]], align 8, !tbaa [[LONG_TBAA22:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test32( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[IDX2]], 43 -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT7:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX2]] to i64, !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB17:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: cont7: +// SANITIZE-WITHOUT-ATTR: [[CONT7]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344 // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM4:%.*]] = sext i32 [[IDX1]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, ptr [[ARRAY]], i64 [[IDXPROM4]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i64 -1, ptr [[ARRAYIDX5]], align 8, !tbaa [[TBAA25:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i64 -1, ptr [[ARRAYIDX5]], align 8, !tbaa [[LONG_TBAA25:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test32( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM1:%.*]] = sext i32 [[IDX1]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[ARRAY]], i64 [[IDXPROM1]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i64 -1, ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA22:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i64 -1, ptr [[ARRAYIDX2]], align 8, !tbaa [[LONG_TBAA22:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test32(struct annotated_with_array *ptr, int idx1, int idx2) { @@ -2141,14 +2141,14 @@ void test32(struct annotated_with_array *ptr, int idx1, int idx2) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 17179869521) i64 @test32_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 43 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[INDEX]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB49:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont1: +// SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -2162,7 +2162,7 @@ void test32(struct annotated_with_array *ptr, int idx1, int idx2) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -34359738016, 34359738705) i64 @test32_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -2178,19 +2178,19 @@ void test32(struct annotated_with_array *ptr, int idx1, int idx2) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test32_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 43 -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[INDEX]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: cont1: +// SANITIZE-WITHOUT-ATTR: [[CONT1]]: // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test32_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test32_bdos(struct annotated_with_array *ptr, int index) { @@ -2199,7 +2199,7 @@ size_t test32_bdos(struct annotated_with_array *ptr, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -21474836134, 21474836817) i64 @test32_bdos_cast( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -2216,7 +2216,7 @@ size_t test32_bdos(struct annotated_with_array *ptr, int index) { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -21474836134, 21474836817) i64 @test32_bdos_cast( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 @@ -2233,12 +2233,12 @@ size_t test32_bdos(struct annotated_with_array *ptr, int index) { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test32_bdos_cast( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test32_bdos_cast( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test32_bdos_cast(struct annotated_with_array *ptr, int index) { @@ -2247,22 +2247,22 @@ size_t test32_bdos_cast(struct annotated_with_array *ptr, int index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test33( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test33( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test33( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test33( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test33(struct annotated *ptr) { @@ -2278,50 +2278,50 @@ struct multi_subscripts { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test34( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 42 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX1]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB51:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont1: +// SANITIZE-WITH-ATTR: [[CONT1]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IDX2]], 43 -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS2:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds2: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS2:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS2]]: // SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = zext i32 [[IDX2]] to i64 // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB52:[0-9]+]], i64 [[TMP3]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont3: +// SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test34( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test34( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = icmp ult i32 [[IDX1]], 42 -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label %[[CONT1:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX1]] to i64, !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: cont1: +// SANITIZE-WITHOUT-ATTR: [[CONT1]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IDX2]], 43 -// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP2]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS2:%.*]], !prof [[PROF8]], !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: handler.out_of_bounds2: +// SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP2]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS2:.*]], !prof [[PROF8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR: [[HANDLER_OUT_OF_BOUNDS2]]: // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP3:%.*]] = zext i32 [[IDX2]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB21:[0-9]+]], i64 [[TMP3]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] -// SANITIZE-WITHOUT-ATTR: cont3: +// SANITIZE-WITHOUT-ATTR: [[CONT3]]: // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test34( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test34(struct multi_subscripts *ptr, int idx1, int idx2) { @@ -2330,43 +2330,43 @@ size_t test34(struct multi_subscripts *ptr, int idx1, int idx2) { // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test35( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: handler.out_of_bounds: +// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT3:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF3]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR: [[HANDLER_OUT_OF_BOUNDS]]: // SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB53:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] -// SANITIZE-WITH-ATTR: cont3: +// SANITIZE-WITH-ATTR: [[CONT3]]: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// SANITIZE-WITH-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]] +// SANITIZE-WITH-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA4]] // SANITIZE-WITH-ATTR-NEXT: ret void // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test35( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef writeonly captures(none) [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// NO-SANITIZE-WITH-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITH-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test35( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// SANITIZE-WITHOUT-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// SANITIZE-WITHOUT-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test35( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef writeonly captures(none) [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDEX]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // void test35(struct annotated *p, size_t index) { @@ -2375,22 +2375,22 @@ void test35(struct annotated *p, size_t index) { // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test35_bdos( // SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 0 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test35_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR3]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 0 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test35_bdos( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 0 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test35_bdos( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 0 // size_t test35_bdos(struct annotated *p) { @@ -2412,22 +2412,22 @@ struct { // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test36( // SANITIZE-WITH-ATTR-SAME: ) local_unnamed_addr #[[ATTR6:[0-9]+]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test36( // NO-SANITIZE-WITH-ATTR-SAME: ) local_unnamed_addr #[[ATTR10:[0-9]+]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1 // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test36( // SANITIZE-WITHOUT-ATTR-SAME: ) local_unnamed_addr #[[ATTR6:[0-9]+]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test36( // NO-SANITIZE-WITHOUT-ATTR-SAME: ) local_unnamed_addr #[[ATTR9:[0-9]+]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test36() { @@ -2436,7 +2436,7 @@ size_t test36() { // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test37( // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8 // SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -2447,7 +2447,7 @@ size_t test36() { // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test37( // NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[PTR:%.*]]) local_unnamed_addr #[[ATTR2]] { -// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 @@ -2458,14 +2458,113 @@ size_t test36() { // // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test37( // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { -// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test37( // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR1]] { -// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 // size_t test37(struct annotated *ptr) { return __builtin_dynamic_object_size((1, 2, (4, 5, (7, 8, 9, (10, ptr->array)))), 1); } +//. +// SANITIZE-WITH-ATTR: [[META2]] = !{} +// SANITIZE-WITH-ATTR: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} +// SANITIZE-WITH-ATTR: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// SANITIZE-WITH-ATTR: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META7]] = !{!"Simple C/C++ TBAA"} +// SANITIZE-WITH-ATTR: [[PROF8]] = !{!"branch_weights", i32 1, i32 1048575} +// SANITIZE-WITH-ATTR: [[CHAR_TBAA9]] = !{[[META6]], [[META6]], i64 0} +// SANITIZE-WITH-ATTR: [[TBAA_STRUCT10]] = !{i64 0, i64 24, [[CHAR_TBAA9]]} +// SANITIZE-WITH-ATTR: [[_ZTS10TEST13_BARPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META13:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META12]] = !{!"test13_foo", [[META13]], i64 0} +// SANITIZE-WITH-ATTR: [[META13]] = !{!"p1 _ZTS10test13_bar", [[META14:![0-9]+]], i64 0} +// SANITIZE-WITH-ATTR: [[META14]] = !{!"any pointer", [[META6]], i64 0} +// SANITIZE-WITH-ATTR: [[_ZTS10TEST13_FOOPTR_TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// SANITIZE-WITH-ATTR: [[META16]] = !{!"p1 _ZTS10test13_foo", [[META14]], i64 0} +// SANITIZE-WITH-ATTR: [[_ZTS9TESTS_FOOPTR_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// SANITIZE-WITH-ATTR: [[META18]] = !{!"p1 _ZTS9tests_foo", [[META14]], i64 0} +// SANITIZE-WITH-ATTR: [[_ZTS10TEST27_BARPTR_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} +// SANITIZE-WITH-ATTR: [[META20]] = !{!"p1 _ZTS10test27_bar", [[META14]], i64 0} +// SANITIZE-WITH-ATTR: [[_ZTS10TEST28_FOOPTR_TBAA21]] = !{[[META22:![0-9]+]], [[META22]], i64 0} +// SANITIZE-WITH-ATTR: [[META22]] = !{!"p1 _ZTS10test28_foo", [[META14]], i64 0} +// SANITIZE-WITH-ATTR: [[_ZTS9ANNOTATEDPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0} +// SANITIZE-WITH-ATTR: [[META24]] = !{!"p1 _ZTS9annotated", [[META14]], i64 0} +// SANITIZE-WITH-ATTR: [[LONG_TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0} +// SANITIZE-WITH-ATTR: [[META26]] = !{!"long", [[META6]], i64 0} +//. +// NO-SANITIZE-WITH-ATTR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"} +// NO-SANITIZE-WITH-ATTR: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[TBAA_STRUCT7]] = !{i64 0, i64 24, [[CHAR_TBAA6]]} +// NO-SANITIZE-WITH-ATTR: [[_ZTS10TEST13_BARPTR_TBAA8]] = !{[[META9:![0-9]+]], [[META10:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META9]] = !{!"test13_foo", [[META10]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META10]] = !{!"p1 _ZTS10test13_bar", [[META11:![0-9]+]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META11]] = !{!"any pointer", [[META4]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[_ZTS10TEST13_FOOPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META13]] = !{!"p1 _ZTS10test13_foo", [[META11]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[_ZTS9TESTS_FOOPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META15]] = !{!"p1 _ZTS9tests_foo", [[META11]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[_ZTS10TEST27_BARPTR_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META17]] = !{!"p1 _ZTS10test27_bar", [[META11]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[_ZTS10TEST28_FOOPTR_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META19]] = !{!"p1 _ZTS10test28_foo", [[META11]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[_ZTS9ANNOTATEDPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META21]] = !{!"p1 _ZTS9annotated", [[META11]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[LONG_TBAA22]] = !{[[META23:![0-9]+]], [[META23]], i64 0} +// NO-SANITIZE-WITH-ATTR: [[META23]] = !{!"long", [[META4]], i64 0} +//. +// SANITIZE-WITHOUT-ATTR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"} +// SANITIZE-WITHOUT-ATTR: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[TBAA_STRUCT7]] = !{i64 0, i64 24, [[CHAR_TBAA6]]} +// SANITIZE-WITHOUT-ATTR: [[PROF8]] = !{!"branch_weights", i32 1048575, i32 1} +// SANITIZE-WITHOUT-ATTR: [[META9]] = !{} +// SANITIZE-WITHOUT-ATTR: [[PROF10]] = !{!"branch_weights", i32 1, i32 1048575} +// SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST13_BARPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META13:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META12]] = !{!"test13_foo", [[META13]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META13]] = !{!"p1 _ZTS10test13_bar", [[META14:![0-9]+]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META14]] = !{!"any pointer", [[META4]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST13_FOOPTR_TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META16]] = !{!"p1 _ZTS10test13_foo", [[META14]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[_ZTS9TESTS_FOOPTR_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META18]] = !{!"p1 _ZTS9tests_foo", [[META14]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST27_BARPTR_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META20]] = !{!"p1 _ZTS10test27_bar", [[META14]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST28_FOOPTR_TBAA21]] = !{[[META22:![0-9]+]], [[META22]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META22]] = !{!"p1 _ZTS10test28_foo", [[META14]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[_ZTS9ANNOTATEDPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META24]] = !{!"p1 _ZTS9annotated", [[META14]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[LONG_TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0} +// SANITIZE-WITHOUT-ATTR: [[META26]] = !{!"long", [[META4]], i64 0} +//. +// NO-SANITIZE-WITHOUT-ATTR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META5]] = !{!"Simple C/C++ TBAA"} +// NO-SANITIZE-WITHOUT-ATTR: [[CHAR_TBAA6]] = !{[[META4]], [[META4]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[TBAA_STRUCT7]] = !{i64 0, i64 24, [[CHAR_TBAA6]]} +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST13_BARPTR_TBAA8]] = !{[[META9:![0-9]+]], [[META10:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META9]] = !{!"test13_foo", [[META10]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META10]] = !{!"p1 _ZTS10test13_bar", [[META11:![0-9]+]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META11]] = !{!"any pointer", [[META4]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST13_FOOPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META13]] = !{!"p1 _ZTS10test13_foo", [[META11]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS9TESTS_FOOPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META15]] = !{!"p1 _ZTS9tests_foo", [[META11]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST27_BARPTR_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META17]] = !{!"p1 _ZTS10test27_bar", [[META11]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS10TEST28_FOOPTR_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META19]] = !{!"p1 _ZTS10test28_foo", [[META11]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[_ZTS9ANNOTATEDPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META21]] = !{!"p1 _ZTS9annotated", [[META11]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[LONG_TBAA22]] = !{[[META23:![0-9]+]], [[META23]], i64 0} +// NO-SANITIZE-WITHOUT-ATTR: [[META23]] = !{!"long", [[META4]], i64 0} +//. diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c index 69cec72495d30..2455f3b616ce7 100644 --- a/clang/test/CodeGen/builtin-maxnum-minnum.c +++ b/clang/test/CodeGen/builtin-maxnum-minnum.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -x c++ -std=c++20 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK typedef _Float16 half8 __attribute__((ext_vector_type(8))); @@ -12,10 +12,10 @@ typedef long double ldouble2 __attribute__((ext_vector_type(2))); // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 -// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[ELT_MINNUM]] // @@ -27,10 +27,10 @@ half8 pfmin16(half8 a, half8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 -// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) // CHECK-NEXT: ret <8 x bfloat> [[ELT_MINNUM]] // @@ -42,10 +42,10 @@ bf16x8 pfmin16b(bf16x8 a, bf16x8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[ELT_MINNUM]] // @@ -57,10 +57,10 @@ float4 pfmin32(float4 a, float4 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: ret <2 x double> [[ELT_MINNUM]] // @@ -72,12 +72,12 @@ double2 pfmin64(double2 a, double2 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 -// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) // CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] // @@ -90,10 +90,10 @@ ldouble2 pfmin80(ldouble2 a, ldouble2 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 -// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[ELT_MAXNUM]] // @@ -105,10 +105,10 @@ half8 pfmax16(half8 a, half8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 -// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) // CHECK-NEXT: ret <8 x bfloat> [[ELT_MAXNUM]] // @@ -120,10 +120,10 @@ bf16x8 pfmax16b(bf16x8 a, bf16x8 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[ELT_MAXNUM]] // @@ -135,10 +135,10 @@ float4 pfmax32(float4 a, float4 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: ret <2 x double> [[ELT_MAXNUM]] // @@ -151,12 +151,12 @@ double2 pfmax64(double2 a, double2 b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 -// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) // CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] // @@ -165,7 +165,7 @@ ldouble2 pfmax80(ldouble2 a, ldouble2 b) { } //. -// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} // CHECK: [[META4]] = !{!"Simple C++ TBAA"} //. diff --git a/clang/test/CodeGen/cleanup-destslot-simple.c b/clang/test/CodeGen/cleanup-destslot-simple.c index 8ace33254723c..23a70d4a7da25 100644 --- a/clang/test/CodeGen/cleanup-destslot-simple.c +++ b/clang/test/CodeGen/cleanup-destslot-simple.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s --check-prefix=CHECK-LIFETIME // We shouldn't have markers at -O0 or with msan. @@ -9,22 +9,24 @@ // There is no exception to handle here, lifetime.end is not a destructor, // so there is no need have cleanup dest slot related code -// CHECK-LIFETIME-LABEL: @test( -// CHECK-LIFETIME-NEXT: entry: +// CHECK-LIFETIME-LABEL: define dso_local i32 @test( +// CHECK-LIFETIME-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] { +// CHECK-LIFETIME-NEXT: [[ENTRY:.*:]] // CHECK-LIFETIME-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK-LIFETIME-NEXT: [[P:%.*]] = alloca ptr, align 8 // CHECK-LIFETIME-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR2:[0-9]+]], !dbg [[DBG9:![0-9]+]] -// CHECK-LIFETIME-NEXT: store i32 3, ptr [[X]], align 4, !dbg [[DBG10:![0-9]+]], !tbaa [[TBAA11:![0-9]+]] +// CHECK-LIFETIME-NEXT: store i32 3, ptr [[X]], align 4, !dbg [[DBG10:![0-9]+]], !tbaa [[INT_TBAA11:![0-9]+]] // CHECK-LIFETIME-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG15:![0-9]+]] -// CHECK-LIFETIME-NEXT: store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16:![0-9]+]], !tbaa [[TBAA17:![0-9]+]] -// CHECK-LIFETIME-NEXT: [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]] -// CHECK-LIFETIME-NEXT: [[TMP0:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG20:![0-9]+]], !tbaa [[TBAA11]] -// CHECK-LIFETIME-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG21:![0-9]+]] -// CHECK-LIFETIME-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG21]] -// CHECK-LIFETIME-NEXT: ret i32 [[TMP0]], !dbg [[DBG22:![0-9]+]] +// CHECK-LIFETIME-NEXT: store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16:![0-9]+]], !tbaa [[INTPTR_TBAA17:![0-9]+]] +// CHECK-LIFETIME-NEXT: [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG20:![0-9]+]], !tbaa [[INTPTR_TBAA17]] +// CHECK-LIFETIME-NEXT: [[TMP0:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG21:![0-9]+]], !tbaa [[INT_TBAA11]] +// CHECK-LIFETIME-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG22:![0-9]+]] +// CHECK-LIFETIME-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG22]] +// CHECK-LIFETIME-NEXT: ret i32 [[TMP0]], !dbg [[DBG23:![0-9]+]] // -// CHECK-OPTNONE-LABEL: @test( -// CHECK-OPTNONE-NEXT: entry: +// CHECK-OPTNONE-LABEL: define dso_local i32 @test( +// CHECK-OPTNONE-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] { +// CHECK-OPTNONE-NEXT: [[ENTRY:.*:]] // CHECK-OPTNONE-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK-OPTNONE-NEXT: [[P:%.*]] = alloca ptr, align 8 // CHECK-OPTNONE-NEXT: store i32 3, ptr [[X]], align 4, !dbg [[DBG9:![0-9]+]] @@ -33,87 +35,89 @@ // CHECK-OPTNONE-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG12:![0-9]+]] // CHECK-OPTNONE-NEXT: ret i32 [[TMP1]], !dbg [[DBG13:![0-9]+]] // -// CHECK-MSAN-LABEL: @test( -// CHECK-MSAN-NEXT: entry: +// CHECK-MSAN-LABEL: define dso_local noundef i32 @test( +// CHECK-MSAN-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { +// CHECK-MSAN-NEXT: [[ENTRY:.*:]] // CHECK-MSAN-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK-MSAN-NEXT: [[P:%.*]] = alloca ptr, align 8 -// CHECK-MSAN-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR2:[0-9]+]], !dbg [[DBG9:![0-9]+]] -// CHECK-MSAN-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG9]] -// CHECK-MSAN-NEXT: [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080, !dbg [[DBG9]] -// CHECK-MSAN-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr, !dbg [[DBG9]] -// CHECK-MSAN-NEXT: store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG10:![0-9]+]] -// CHECK-MSAN-NEXT: store i32 3, ptr [[X]], align 4, !dbg [[DBG10]], !tbaa [[TBAA11:![0-9]+]] -// CHECK-MSAN-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG15:![0-9]+]] -// CHECK-MSAN-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64, !dbg [[DBG15]] -// CHECK-MSAN-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080, !dbg [[DBG15]] -// CHECK-MSAN-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr, !dbg [[DBG15]] -// CHECK-MSAN-NEXT: store i64 0, ptr [[TMP5]], align 8, !dbg [[DBG16:![0-9]+]] -// CHECK-MSAN-NEXT: store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16]], !tbaa [[TBAA17:![0-9]+]] -// CHECK-MSAN-NEXT: [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]] -// CHECK-MSAN-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP5]], align 8, !dbg [[DBG19]] -// CHECK-MSAN-NEXT: [[_MSCMP_NOT:%.*]] = icmp eq i64 [[_MSLD]], 0, !dbg [[DBG20:![0-9]+]] -// CHECK-MSAN-NEXT: br i1 [[_MSCMP_NOT]], label [[TMP7:%.*]], label [[TMP6:%.*]], !dbg [[DBG20]], !prof [[PROF21:![0-9]+]] -// CHECK-MSAN: 6: -// CHECK-MSAN-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]], !dbg [[DBG20]] -// CHECK-MSAN-NEXT: unreachable, !dbg [[DBG20]] -// CHECK-MSAN: 7: -// CHECK-MSAN-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG20]], !tbaa [[TBAA11]] -// CHECK-MSAN-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[P_0_P_0_P_0_P_0_]] to i64, !dbg [[DBG20]] -// CHECK-MSAN-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080, !dbg [[DBG20]] -// CHECK-MSAN-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr, !dbg [[DBG20]] -// CHECK-MSAN-NEXT: [[_MSLD1:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG20]] -// CHECK-MSAN-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG22:![0-9]+]] -// CHECK-MSAN-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG22]] -// CHECK-MSAN-NEXT: [[_MSCMP2_NOT:%.*]] = icmp eq i32 [[_MSLD1]], 0, !dbg [[DBG23:![0-9]+]] -// CHECK-MSAN-NEXT: br i1 [[_MSCMP2_NOT]], label [[TMP13:%.*]], label [[TMP12:%.*]], !dbg [[DBG23]], !prof [[PROF21]] -// CHECK-MSAN: 12: -// CHECK-MSAN-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]], !dbg [[DBG23]] -// CHECK-MSAN-NEXT: unreachable, !dbg [[DBG23]] -// CHECK-MSAN: 13: -// CHECK-MSAN-NEXT: ret i32 [[TMP8]], !dbg [[DBG23]] +// CHECK-MSAN-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR3:[0-9]+]], !dbg [[DBG10:![0-9]+]] +// CHECK-MSAN-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG10]] +// CHECK-MSAN-NEXT: [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080, !dbg [[DBG10]] +// CHECK-MSAN-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr, !dbg [[DBG10]] +// CHECK-MSAN-NEXT: store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG11:![0-9]+]] +// CHECK-MSAN-NEXT: store i32 3, ptr [[X]], align 4, !dbg [[DBG11]], !tbaa [[INT_TBAA12:![0-9]+]] +// CHECK-MSAN-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG16:![0-9]+]] +// CHECK-MSAN-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64, !dbg [[DBG16]] +// CHECK-MSAN-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080, !dbg [[DBG16]] +// CHECK-MSAN-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr, !dbg [[DBG16]] +// CHECK-MSAN-NEXT: store i64 0, ptr [[TMP5]], align 8, !dbg [[DBG17:![0-9]+]] +// CHECK-MSAN-NEXT: store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG17]], !tbaa [[INTPTR_TBAA18:![0-9]+]] +// CHECK-MSAN-NEXT: [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG21:![0-9]+]], !tbaa [[INTPTR_TBAA18]] +// CHECK-MSAN-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP5]], align 8, !dbg [[DBG21]] +// CHECK-MSAN-NEXT: [[_MSCMP_NOT:%.*]] = icmp eq i64 [[_MSLD]], 0, !dbg [[DBG22:![0-9]+]] +// CHECK-MSAN-NEXT: br i1 [[_MSCMP_NOT]], label %[[BB7:.*]], label %[[BB6:.*]], !dbg [[DBG22]], !prof [[PROF23:![0-9]+]] +// CHECK-MSAN: [[BB6]]: +// CHECK-MSAN-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]], !dbg [[DBG22]] +// CHECK-MSAN-NEXT: unreachable, !dbg [[DBG22]] +// CHECK-MSAN: [[BB7]]: +// CHECK-MSAN-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG22]], !tbaa [[INT_TBAA12]] +// CHECK-MSAN-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[P_0_P_0_P_0_P_0_]] to i64, !dbg [[DBG22]] +// CHECK-MSAN-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080, !dbg [[DBG22]] +// CHECK-MSAN-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr, !dbg [[DBG22]] +// CHECK-MSAN-NEXT: [[_MSLD1:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG22]] +// CHECK-MSAN-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG24:![0-9]+]] +// CHECK-MSAN-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR3]], !dbg [[DBG24]] +// CHECK-MSAN-NEXT: [[_MSCMP2_NOT:%.*]] = icmp eq i32 [[_MSLD1]], 0, !dbg [[DBG25:![0-9]+]] +// CHECK-MSAN-NEXT: br i1 [[_MSCMP2_NOT]], label %[[BB13:.*]], label %[[BB12:.*]], !dbg [[DBG25]], !prof [[PROF23]] +// CHECK-MSAN: [[BB12]]: +// CHECK-MSAN-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]], !dbg [[DBG25]] +// CHECK-MSAN-NEXT: unreachable, !dbg [[DBG25]] +// CHECK-MSAN: [[BB13]]: +// CHECK-MSAN-NEXT: ret i32 [[TMP8]], !dbg [[DBG25]] // -// CHECK-KMSAN-LABEL: @test( -// CHECK-KMSAN-NEXT: entry: +// CHECK-KMSAN-LABEL: define dso_local i32 @test( +// CHECK-KMSAN-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { +// CHECK-KMSAN-NEXT: [[ENTRY:.*:]] // CHECK-KMSAN-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() #[[ATTR2:[0-9]+]] // CHECK-KMSAN-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK-KMSAN-NEXT: [[P:%.*]] = alloca ptr, align 8 -// CHECK-KMSAN-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG9:![0-9]+]] -// CHECK-KMSAN-NEXT: call void @__msan_poison_alloca(ptr nonnull [[X]], i64 4, ptr nonnull @[[GLOB0:[0-9]+]]) #[[ATTR2]], !dbg [[DBG9]] -// CHECK-KMSAN-NEXT: [[TMP1:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_4(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG10:![0-9]+]] -// CHECK-KMSAN-NEXT: [[TMP2:%.*]] = extractvalue { ptr, ptr } [[TMP1]], 0, !dbg [[DBG10]] -// CHECK-KMSAN-NEXT: store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG10]] -// CHECK-KMSAN-NEXT: store i32 3, ptr [[X]], align 4, !dbg [[DBG10]], !tbaa [[TBAA11:![0-9]+]] -// CHECK-KMSAN-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG15:![0-9]+]] -// CHECK-KMSAN-NEXT: call void @__msan_poison_alloca(ptr nonnull [[P]], i64 8, ptr nonnull @[[GLOB1:[0-9]+]]) #[[ATTR2]], !dbg [[DBG15]] -// CHECK-KMSAN-NEXT: [[TMP3:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_8(ptr nonnull [[P]]) #[[ATTR2]], !dbg [[DBG16:![0-9]+]] -// CHECK-KMSAN-NEXT: [[TMP4:%.*]] = extractvalue { ptr, ptr } [[TMP3]], 0, !dbg [[DBG16]] -// CHECK-KMSAN-NEXT: store i64 0, ptr [[TMP4]], align 8, !dbg [[DBG16]] -// CHECK-KMSAN-NEXT: store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16]], !tbaa [[TBAA17:![0-9]+]] -// CHECK-KMSAN-NEXT: [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]] -// CHECK-KMSAN-NEXT: [[TMP5:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr nonnull [[P]]) #[[ATTR2]], !dbg [[DBG19]] -// CHECK-KMSAN-NEXT: [[TMP6:%.*]] = extractvalue { ptr, ptr } [[TMP5]], 0, !dbg [[DBG19]] -// CHECK-KMSAN-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8, !dbg [[DBG19]] -// CHECK-KMSAN-NEXT: [[_MSCMP_NOT:%.*]] = icmp eq i64 [[_MSLD]], 0, !dbg [[DBG20:![0-9]+]] -// CHECK-KMSAN-NEXT: br i1 [[_MSCMP_NOT]], label [[TMP10:%.*]], label [[TMP7:%.*]], !dbg [[DBG20]], !prof [[PROF21:![0-9]+]] -// CHECK-KMSAN: 7: -// CHECK-KMSAN-NEXT: [[TMP8:%.*]] = extractvalue { ptr, ptr } [[TMP5]], 1, !dbg [[DBG19]] -// CHECK-KMSAN-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8, !dbg [[DBG19]] -// CHECK-KMSAN-NEXT: call void @__msan_warning(i32 [[TMP9]]) #[[ATTR3:[0-9]+]], !dbg [[DBG20]] -// CHECK-KMSAN-NEXT: br label [[TMP10]], !dbg [[DBG20]] -// CHECK-KMSAN: 10: +// CHECK-KMSAN-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG10:![0-9]+]] +// CHECK-KMSAN-NEXT: call void @__msan_poison_alloca(ptr nonnull [[X]], i64 4, ptr nonnull @[[GLOB0:[0-9]+]]) #[[ATTR2]], !dbg [[DBG10]] +// CHECK-KMSAN-NEXT: [[TMP1:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_4(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG11:![0-9]+]] +// CHECK-KMSAN-NEXT: [[TMP2:%.*]] = extractvalue { ptr, ptr } [[TMP1]], 0, !dbg [[DBG11]] +// CHECK-KMSAN-NEXT: store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG11]] +// CHECK-KMSAN-NEXT: store i32 3, ptr [[X]], align 4, !dbg [[DBG11]], !tbaa [[INT_TBAA12:![0-9]+]] +// CHECK-KMSAN-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[P]]), !dbg [[DBG16:![0-9]+]] +// CHECK-KMSAN-NEXT: call void @__msan_poison_alloca(ptr nonnull [[P]], i64 8, ptr nonnull @[[GLOB1:[0-9]+]]) #[[ATTR2]], !dbg [[DBG16]] +// CHECK-KMSAN-NEXT: [[TMP3:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_8(ptr nonnull [[P]]) #[[ATTR2]], !dbg [[DBG17:![0-9]+]] +// CHECK-KMSAN-NEXT: [[TMP4:%.*]] = extractvalue { ptr, ptr } [[TMP3]], 0, !dbg [[DBG17]] +// CHECK-KMSAN-NEXT: store i64 0, ptr [[TMP4]], align 8, !dbg [[DBG17]] +// CHECK-KMSAN-NEXT: store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG17]], !tbaa [[INTPTR_TBAA18:![0-9]+]] +// CHECK-KMSAN-NEXT: [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG21:![0-9]+]], !tbaa [[INTPTR_TBAA18]] +// CHECK-KMSAN-NEXT: [[TMP5:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr nonnull [[P]]) #[[ATTR2]], !dbg [[DBG21]] +// CHECK-KMSAN-NEXT: [[TMP6:%.*]] = extractvalue { ptr, ptr } [[TMP5]], 0, !dbg [[DBG21]] +// CHECK-KMSAN-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8, !dbg [[DBG21]] +// CHECK-KMSAN-NEXT: [[_MSCMP_NOT:%.*]] = icmp eq i64 [[_MSLD]], 0, !dbg [[DBG22:![0-9]+]] +// CHECK-KMSAN-NEXT: br i1 [[_MSCMP_NOT]], label %[[BB10:.*]], label %[[BB7:.*]], !dbg [[DBG22]], !prof [[PROF23:![0-9]+]] +// CHECK-KMSAN: [[BB7]]: +// CHECK-KMSAN-NEXT: [[TMP8:%.*]] = extractvalue { ptr, ptr } [[TMP5]], 1, !dbg [[DBG21]] +// CHECK-KMSAN-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8, !dbg [[DBG21]] +// CHECK-KMSAN-NEXT: call void @__msan_warning(i32 [[TMP9]]) #[[ATTR3:[0-9]+]], !dbg [[DBG22]] +// CHECK-KMSAN-NEXT: br label %[[BB10]], !dbg [[DBG22]] +// CHECK-KMSAN: [[BB10]]: // CHECK-KMSAN-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4008 // CHECK-KMSAN-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr i8, ptr [[TMP0]], i64 800 -// CHECK-KMSAN-NEXT: [[TMP11:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG20]], !tbaa [[TBAA11]] -// CHECK-KMSAN-NEXT: [[TMP12:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr nonnull [[P_0_P_0_P_0_P_0_]]) #[[ATTR2]], !dbg [[DBG20]] -// CHECK-KMSAN-NEXT: [[TMP13:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 0, !dbg [[DBG20]] -// CHECK-KMSAN-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 1, !dbg [[DBG20]] -// CHECK-KMSAN-NEXT: [[_MSLD1:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg [[DBG20]] -// CHECK-KMSAN-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg [[DBG20]] -// CHECK-KMSAN-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG22:![0-9]+]] -// CHECK-KMSAN-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG22]] -// CHECK-KMSAN-NEXT: store i32 [[_MSLD1]], ptr [[RETVAL_SHADOW]], align 8, !dbg [[DBG23:![0-9]+]] -// CHECK-KMSAN-NEXT: store i32 [[TMP15]], ptr [[RETVAL_ORIGIN]], align 4, !dbg [[DBG23]] -// CHECK-KMSAN-NEXT: ret i32 [[TMP11]], !dbg [[DBG23]] +// CHECK-KMSAN-NEXT: [[TMP11:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG22]], !tbaa [[INT_TBAA12]] +// CHECK-KMSAN-NEXT: [[TMP12:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr nonnull [[P_0_P_0_P_0_P_0_]]) #[[ATTR2]], !dbg [[DBG22]] +// CHECK-KMSAN-NEXT: [[TMP13:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 0, !dbg [[DBG22]] +// CHECK-KMSAN-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 1, !dbg [[DBG22]] +// CHECK-KMSAN-NEXT: [[_MSLD1:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg [[DBG22]] +// CHECK-KMSAN-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg [[DBG22]] +// CHECK-KMSAN-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[P]]), !dbg [[DBG24:![0-9]+]] +// CHECK-KMSAN-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG24]] +// CHECK-KMSAN-NEXT: store i32 [[_MSLD1]], ptr [[RETVAL_SHADOW]], align 8, !dbg [[DBG25:![0-9]+]] +// CHECK-KMSAN-NEXT: store i32 [[TMP15]], ptr [[RETVAL_ORIGIN]], align 4, !dbg [[DBG25]] +// CHECK-KMSAN-NEXT: ret i32 [[TMP11]], !dbg [[DBG25]] // int test(void) { int x = 3; diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c index ee3a22b40fefd..8a631c471c329 100644 --- a/clang/test/CodeGen/isfpclass.c +++ b/clang/test/CodeGen/isfpclass.c @@ -1,9 +1,9 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple aarch64-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s -// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_finite -// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_finite( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) // CHECK-NEXT: [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000 // CHECK-NEXT: ret i1 [[TMP1]] @@ -12,9 +12,9 @@ _Bool check_isfpclass_finite(float x) { return __builtin_isfpclass(x, 504 /*Finite*/); } -// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_finite_strict -// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_finite_strict( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 504) #[[ATTR5:[0-9]+]] // CHECK-NEXT: ret i1 [[TMP0]] // @@ -23,9 +23,9 @@ _Bool check_isfpclass_finite_strict(float x) { return __builtin_isfpclass(x, 504 /*Finite*/); } -// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_nan_f32 -// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_nan_f32( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 // CHECK-NEXT: ret i1 [[TMP0]] // @@ -33,9 +33,9 @@ _Bool check_isfpclass_nan_f32(float x) { return __builtin_isfpclass(x, 3 /*NaN*/); } -// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_nan_f32_strict -// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_nan_f32_strict( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 3) #[[ATTR5]] // CHECK-NEXT: ret i1 [[TMP0]] // @@ -44,9 +44,9 @@ _Bool check_isfpclass_nan_f32_strict(float x) { return __builtin_isfpclass(x, 3 /*NaN*/); } -// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_snan_f64 -// CHECK-SAME: (double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_snan_f64( +// CHECK-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f64(double [[X]], i32 1) // CHECK-NEXT: ret i1 [[TMP0]] // @@ -54,9 +54,9 @@ _Bool check_isfpclass_snan_f64(double x) { return __builtin_isfpclass(x, 1 /*SNaN*/); } -// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_snan_f64_strict -// CHECK-SAME: (double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_snan_f64_strict( +// CHECK-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f64(double [[X]], i32 1) #[[ATTR5]] // CHECK-NEXT: ret i1 [[TMP0]] // @@ -65,9 +65,9 @@ _Bool check_isfpclass_snan_f64_strict(double x) { return __builtin_isfpclass(x, 1 /*NaN*/); } -// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_zero_f16 -// CHECK-SAME: (half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_zero_f16( +// CHECK-SAME: half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq half [[X]], 0xH0000 // CHECK-NEXT: ret i1 [[TMP0]] // @@ -75,9 +75,9 @@ _Bool check_isfpclass_zero_f16(_Float16 x) { return __builtin_isfpclass(x, 96 /*Zero*/); } -// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_zero_f16_strict -// CHECK-SAME: (half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_zero_f16_strict( +// CHECK-SAME: half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f16(half [[X]], i32 96) #[[ATTR5]] // CHECK-NEXT: ret i1 [[TMP0]] // @@ -86,9 +86,9 @@ _Bool check_isfpclass_zero_f16_strict(_Float16 x) { return __builtin_isfpclass(x, 96 /*Zero*/); } -// CHECK-LABEL: define dso_local noundef i1 @check_isnan -// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isnan( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 3) #[[ATTR5]] // CHECK-NEXT: ret i1 [[TMP0]] // @@ -97,9 +97,9 @@ _Bool check_isnan(float x) { return __builtin_isnan(x); } -// CHECK-LABEL: define dso_local noundef i1 @check_isinf -// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isinf( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 516) #[[ATTR5]] // CHECK-NEXT: ret i1 [[TMP0]] // @@ -108,9 +108,9 @@ _Bool check_isinf(float x) { return __builtin_isinf(x); } -// CHECK-LABEL: define dso_local noundef i1 @check_isfinite -// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isfinite( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 504) #[[ATTR5]] // CHECK-NEXT: ret i1 [[TMP0]] // @@ -119,9 +119,9 @@ _Bool check_isfinite(float x) { return __builtin_isfinite(x); } -// CHECK-LABEL: define dso_local noundef i1 @check_isnormal -// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef i1 @check_isnormal( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 264) #[[ATTR5]] // CHECK-NEXT: ret i1 [[TMP0]] // @@ -136,9 +136,9 @@ typedef double __attribute__((ext_vector_type(4))) double4; typedef int __attribute__((ext_vector_type(4))) int4; typedef long __attribute__((ext_vector_type(4))) long4; -// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32 -// CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32( +// CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = fcmp uno <4 x float> [[X]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -147,9 +147,9 @@ int4 check_isfpclass_nan_v4f32(float4 x) { return __builtin_isfpclass(x, 3 /*NaN*/); } -// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32 -// CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32( +// CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) #[[ATTR5]] // CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -159,15 +159,20 @@ int4 check_isfpclass_nan_strict_v4f32(float4 x) { return __builtin_isfpclass(x, 3 /*NaN*/); } -// CHECK-LABEL: define dso_local void @check_isfpclass_nan_v4f64 -// CHECK-SAME: (ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 16 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[TMP0]], align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define dso_local void @check_isfpclass_nan_v4f64( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 16 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[TMP0]], align 16, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <4 x double> [[X]], zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i64> -// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // long4 check_isfpclass_nan_v4f64(double4 x) { return __builtin_isfpclass(x, 3 /*NaN*/); } +//. +// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c index 0124cc5c06d43..20a31003fe915 100644 --- a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c +++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "(@powl|@cargl|@ilogbl|!|load|store)" --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "(@powl|@cargl|@ilogbl|!|load|store)" --version 6 // RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK // RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-pc-win64 -o - | FileCheck %s -check-prefixes=CHECK-WIN64 // RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-I686 @@ -18,49 +18,49 @@ long double powl(long double a, long double b); // CHECK-LABEL: define dso_local x86_fp80 @test_powl( // CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// CHECK: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]] // // CHECK-WIN64-LABEL: define dso_local x86_fp80 @test_powl( // CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-WIN64: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// CHECK-WIN64: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]] // // CHECK-I686-LABEL: define dso_local x86_fp80 @test_powl( // CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-I686: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// CHECK-I686: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[INT_TBAA3:![0-9]+]] // // CHECK-PPC-LABEL: define dso_local ppc_fp128 @test_powl( // CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]], ppc_fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-PPC: [[CALL:%.*]] = tail call ppc_fp128 @powl(ppc_fp128 noundef [[A]], ppc_fp128 noundef [[B]]) #[[ATTR4:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// CHECK-PPC: [[CALL:%.*]] = tail call ppc_fp128 @powl(ppc_fp128 noundef [[A]], ppc_fp128 noundef [[B]]) #[[ATTR4:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]] // // CHECK-ARM-LABEL: define dso_local double @test_powl( // CHECK-ARM-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-ARM: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// CHECK-ARM: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[INT_TBAA3:![0-9]+]] // // CHECK-ARM-HF-LABEL: define dso_local double @test_powl( // CHECK-ARM-HF-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[INT_TBAA3:![0-9]+]] // // CHECK-THUMB-LABEL: define double @test_powl( // CHECK-THUMB-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-THUMB: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// CHECK-THUMB: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[INT_TBAA3:![0-9]+]] // // CHECK-AARCH-LABEL: define dso_local fp128 @test_powl( // CHECK-AARCH-SAME: fp128 noundef [[A:%.*]], fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @powl(fp128 noundef [[A]], fp128 noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @powl(fp128 noundef [[A]], fp128 noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]] // // CHECK-SPIR-LABEL: define dso_local spir_func double @test_powl( // CHECK-SPIR-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR4:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR4:[0-9]+]], !tbaa [[INT_TBAA2:![0-9]+]] // // CHECK-MINGW32-LABEL: define dso_local void @test_powl( // CHECK-MINGW32-SAME: ptr dead_on_unwind noalias writable writeonly sret(x86_fp80) align 16 captures(none) initializes((0, 10)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA6:![0-9]+]] -// CHECK-MINGW32: [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[TBAA6]] -// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA6]] -// CHECK-MINGW32: store x86_fp80 [[B]], ptr [[BYVAL_TEMP1:%.*]], align 16, !tbaa [[TBAA6]] +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[LONG_DOUBLE_TBAA6:![0-9]+]] +// CHECK-MINGW32: [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] +// CHECK-MINGW32: store x86_fp80 [[B]], ptr [[BYVAL_TEMP1:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] // CHECK-MINGW32: call void @powl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr dead_on_return noundef nonnull [[BYVAL_TEMP]], ptr dead_on_return noundef nonnull [[BYVAL_TEMP1]]) #[[ATTR3:[0-9]+]] -// CHECK-MINGW32: [[TMP2:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA6]] -// CHECK-MINGW32: store x86_fp80 [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[TBAA6]] +// CHECK-MINGW32: [[TMP2:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] +// CHECK-MINGW32: store x86_fp80 [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] // long double test_powl(long double a, long double b) { return powl(a, b); @@ -104,21 +104,21 @@ long double test_powl(long double a, long double b) { // // CHECK-ARM-LABEL: define dso_local void @test_cargl( // CHECK-ARM-SAME: ptr dead_on_unwind noalias writable writeonly sret({ double, double }) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], [2 x i64] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// CHECK-ARM: [[CALL:%.*]] = tail call double @cargl([2 x i64] noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[TBAA3]] +// CHECK-ARM: [[CALL:%.*]] = tail call double @cargl([2 x i64] noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[INT_TBAA3]] // CHECK-ARM: store double [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 8 // CHECK-ARM: store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8 // // CHECK-ARM-HF-LABEL: define dso_local { double, double } @test_cargl( // CHECK-ARM-HF-SAME: { double, double } noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @cargl({ double, double } noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[TBAA3]] +// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @cargl({ double, double } noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[INT_TBAA3]] // // CHECK-THUMB-LABEL: define { double, double } @test_cargl( // CHECK-THUMB-SAME: [2 x double] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// CHECK-THUMB: [[CALL:%.*]] = tail call double @cargl([2 x double] noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[TBAA3]] +// CHECK-THUMB: [[CALL:%.*]] = tail call double @cargl([2 x double] noundef [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[INT_TBAA3]] // // CHECK-AARCH-LABEL: define dso_local { fp128, fp128 } @test_cargl( // CHECK-AARCH-SAME: [2 x fp128] noundef alignstack(16) [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @cargl([2 x fp128] noundef alignstack(16) [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[TBAA2]] +// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @cargl([2 x fp128] noundef alignstack(16) [[CLD_COERCE]]) #[[ATTR3]], !tbaa [[INT_TBAA2]] // // CHECK-SPIR-LABEL: define dso_local spir_func void @test_cargl( // CHECK-SPIR-SAME: ptr dead_on_unwind noalias writable writeonly sret({ double, double }) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval({ double, double }) align 8 captures(none) [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { @@ -137,7 +137,7 @@ long double test_powl(long double a, long double b) { // CHECK-MINGW32: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 // CHECK-MINGW32: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 // CHECK-MINGW32: call void @cargl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr dead_on_return noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] -// CHECK-MINGW32: [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA6]] +// CHECK-MINGW32: [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] // CHECK-MINGW32: [[CLD_REAL3:%.*]] = load x86_fp80, ptr [[CLD]], align 16 // CHECK-MINGW32: [[CLD_IMAG5:%.*]] = load x86_fp80, ptr [[CLD_IMAGP]], align 16 // CHECK-MINGW32: store x86_fp80 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 16 @@ -154,96 +154,96 @@ int ilogbl(long double a); // CHECK-LABEL: define dso_local i32 @test_ilogb( // CHECK-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] +// CHECK: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[INT_TBAA2]] // // CHECK-WIN64-LABEL: define dso_local i32 @test_ilogb( // CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-WIN64: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] +// CHECK-WIN64: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[INT_TBAA2]] // // CHECK-I686-LABEL: define dso_local i32 @test_ilogb( // CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-I686: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA3]] +// CHECK-I686: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[INT_TBAA3]] // // CHECK-PPC-LABEL: define dso_local i32 @test_ilogb( // CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-PPC: [[CALL:%.*]] = tail call i32 @ilogbl(ppc_fp128 noundef [[A]]) #[[ATTR4]], !tbaa [[TBAA2]] +// CHECK-PPC: [[CALL:%.*]] = tail call i32 @ilogbl(ppc_fp128 noundef [[A]]) #[[ATTR4]], !tbaa [[INT_TBAA2]] // // CHECK-ARM-LABEL: define dso_local i32 @test_ilogb( // CHECK-ARM-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-ARM: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA3]] +// CHECK-ARM: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[INT_TBAA3]] // // CHECK-ARM-HF-LABEL: define dso_local i32 @test_ilogb( // CHECK-ARM-HF-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-ARM-HF: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA3]] +// CHECK-ARM-HF: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[INT_TBAA3]] // // CHECK-THUMB-LABEL: define i32 @test_ilogb( // CHECK-THUMB-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-THUMB: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA3]] +// CHECK-THUMB: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[INT_TBAA3]] // // CHECK-AARCH-LABEL: define dso_local i32 @test_ilogb( // CHECK-AARCH-SAME: fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-AARCH: [[CALL:%.*]] = tail call i32 @ilogbl(fp128 noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]] +// CHECK-AARCH: [[CALL:%.*]] = tail call i32 @ilogbl(fp128 noundef [[A]]) #[[ATTR3]], !tbaa [[INT_TBAA2]] // // CHECK-SPIR-LABEL: define dso_local spir_func i32 @test_ilogb( // CHECK-SPIR-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func i32 @ilogbl(double noundef [[A]]) #[[ATTR4]], !tbaa [[TBAA2]] +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func i32 @ilogbl(double noundef [[A]]) #[[ATTR4]], !tbaa [[INT_TBAA2]] // // CHECK-MINGW32-LABEL: define dso_local i32 @test_ilogb( // CHECK-MINGW32-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA6]] -// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA6]] +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[LONG_DOUBLE_TBAA6]] // CHECK-MINGW32: [[CALL:%.*]] = call i32 @ilogbl(ptr dead_on_return noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] // int test_ilogb(long double a) { return ilogbl(a); } //. -// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-WIN64: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-WIN64: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK-WIN64: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} // CHECK-WIN64: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK-WIN64: [[META5]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-I686: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-I686: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} // CHECK-I686: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} // CHECK-I686: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} // CHECK-I686: [[META6]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-PPC: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-PPC: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK-PPC: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} // CHECK-PPC: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK-PPC: [[META5]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-ARM: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-ARM: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} // CHECK-ARM: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} // CHECK-ARM: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} // CHECK-ARM: [[META6]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-ARM-HF: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-ARM-HF: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} // CHECK-ARM-HF: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} // CHECK-ARM-HF: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} // CHECK-ARM-HF: [[META6]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-THUMB: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-THUMB: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} // CHECK-THUMB: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} // CHECK-THUMB: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} // CHECK-THUMB: [[META6]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-AARCH: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-AARCH: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK-AARCH: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} // CHECK-AARCH: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK-AARCH: [[META5]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-SPIR: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-SPIR: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK-SPIR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} // CHECK-SPIR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // CHECK-SPIR: [[META5]] = !{!"Simple C/C++ TBAA"} //. -// CHECK-MINGW32: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK-MINGW32: [[LONG_DOUBLE_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} // CHECK-MINGW32: [[META7]] = !{!"long double", [[META8:![0-9]+]], i64 0} // CHECK-MINGW32: [[META8]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} // CHECK-MINGW32: [[META9]] = !{!"Simple C/C++ TBAA"} diff --git a/clang/test/CodeGen/math-libcalls-tbaa.c b/clang/test/CodeGen/math-libcalls-tbaa.c index b2f502e5b4729..53ca7963b27c1 100644 --- a/clang/test/CodeGen/math-libcalls-tbaa.c +++ b/clang/test/CodeGen/math-libcalls-tbaa.c @@ -1,7 +1,7 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 -// RUN: %clang_cc1 -triple=aarch64-unknown-linux-gnu -fmath-errno -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,NONEWSTRUCTPATHTBAA -// RUN: %clang_cc1 -triple=aarch64-unknown-linux-gnu -fmath-errno -O3 -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,NEWSTRUCTPATHTBAA +// RUN: %clang_cc1 -triple=aarch64-unknown-linux-gnu -fmath-errno -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes=NONEWSTRUCTPATHTBAA +// RUN: %clang_cc1 -triple=aarch64-unknown-linux-gnu -fmath-errno -O3 -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s -check-prefixes=NEWSTRUCTPATHTBAA float expf(float); double remainder(double, double); @@ -13,14 +13,23 @@ float crealf(float _Complex); // Emit int TBAA metadata on FP math libcalls, which is useful for alias analysis -// CHECK-LABEL: define dso_local float @test_expf( -// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[TBAA6:![0-9]+]] -// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] -// CHECK-NEXT: ret float [[MUL]] +// NONEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_expf( +// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2:![0-9]+]] +// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[INT_TBAA6:![0-9]+]] +// NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] +// NONEWSTRUCTPATHTBAA-NEXT: ret float [[MUL]] +// +// NEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_expf( +// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]] +// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[TBAA6:![0-9]+]] +// NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] +// NEWSTRUCTPATHTBAA-NEXT: ret float [[MUL]] // float test_expf (float num[]) { const float expm2 = expf(num[10]); // Emit TBAA metadata on @expf @@ -28,14 +37,23 @@ float test_expf (float num[]) { return tmp; } -// CHECK-LABEL: define dso_local float @test_builtin_expf( -// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[TBAA6]] -// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] -// CHECK-NEXT: ret float [[MUL]] +// NONEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_builtin_expf( +// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[INT_TBAA6]] +// NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] +// NONEWSTRUCTPATHTBAA-NEXT: ret float [[MUL]] +// +// NEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_builtin_expf( +// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40 +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[TBAA6]] +// NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]] +// NEWSTRUCTPATHTBAA-NEXT: ret float [[MUL]] // float test_builtin_expf (float num[]) { const float expm2 = __builtin_expf(num[10]); // Emit TBAA metadata on @expf @@ -45,14 +63,23 @@ float test_builtin_expf (float num[]) { // // Negative test: fabs cannot set errno -// CHECK-LABEL: define dso_local double @test_fabs( -// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80 -// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.fabs.f64(double [[TMP0]]) -// CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret double [[MUL]] +// NONEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_fabs( +// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80 +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA8:![0-9]+]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = tail call double @llvm.fabs.f64(double [[TMP0]]) +// NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[TMP1]] +// NONEWSTRUCTPATHTBAA-NEXT: ret double [[MUL]] +// +// NEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_fabs( +// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80 +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8:![0-9]+]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = tail call double @llvm.fabs.f64(double [[TMP0]]) +// NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[TMP1]] +// NEWSTRUCTPATHTBAA-NEXT: ret double [[MUL]] // double test_fabs (double num[]) { const double expm2 = fabs(num[10]); // Don't emit TBAA metadata @@ -60,14 +87,23 @@ double test_fabs (double num[]) { return tmp; } -// CHECK-LABEL: define dso_local double @test_remainder( -// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]], double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80 -// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[TBAA6]] -// CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]] -// CHECK-NEXT: ret double [[MUL]] +// NONEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_remainder( +// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]], double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80 +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA8]] +// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[INT_TBAA6]] +// NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]] +// NONEWSTRUCTPATHTBAA-NEXT: ret double [[MUL]] +// +// NEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_remainder( +// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]], double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80 +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]] +// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[TBAA6]] +// NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]] +// NEWSTRUCTPATHTBAA-NEXT: ret double [[MUL]] // double test_remainder (double num[], double a) { const double expm2 = remainder(num[10], a); // Emit TBAA metadata @@ -78,17 +114,29 @@ double test_remainder (double num[], double a) { // // TODO: frexp is not subject to any errors, but also writes to // its int pointer out argument, so it could emit int TBAA metadata. -// CHECK-LABEL: define dso_local double @test_frexp( -// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[E:%.*]] = alloca i32, align 4 -// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[E]]) #[[ATTR9]] -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 16 -// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: [[CALL:%.*]] = call double @frexp(double noundef [[TMP0]], ptr noundef nonnull [[E]]) #[[ATTR9]] -// CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[E]]) #[[ATTR9]] -// CHECK-NEXT: ret double [[MUL]] +// NONEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_frexp( +// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +// NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NONEWSTRUCTPATHTBAA-NEXT: [[E:%.*]] = alloca i32, align 4 +// NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[E]]) #[[ATTR9]] +// NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 16 +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA8]] +// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = call double @frexp(double noundef [[TMP0]], ptr noundef nonnull [[E]]) #[[ATTR9]] +// NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]] +// NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[E]]) #[[ATTR9]] +// NONEWSTRUCTPATHTBAA-NEXT: ret double [[MUL]] +// +// NEWSTRUCTPATHTBAA-LABEL: define dso_local double @test_frexp( +// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +// NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NEWSTRUCTPATHTBAA-NEXT: [[E:%.*]] = alloca i32, align 4 +// NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[E]]) #[[ATTR9]] +// NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 16 +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]] +// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = call double @frexp(double noundef [[TMP0]], ptr noundef nonnull [[E]]) #[[ATTR9]] +// NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]] +// NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[E]]) #[[ATTR9]] +// NEWSTRUCTPATHTBAA-NEXT: ret double [[MUL]] // double test_frexp (double num[]) { int e; @@ -100,24 +148,43 @@ double test_frexp (double num[]) { // // Negative test: sincos is a library function, but is not a builtin function // checked in CodeGenFunction::EmitCallExpr. -// CHECK-LABEL: define dso_local float @test_sincos( -// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SIN:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[COS:%.*]] = alloca float, align 4 -// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[SIN]]) #[[ATTR9]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[COS]]) #[[ATTR9]] -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: call void @sincos(float noundef [[TMP0]], ptr noundef nonnull [[SIN]], ptr noundef nonnull [[COS]]) #[[ATTR9]] -// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP1]], [[TMP2]] -// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[TMP3]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[COS]]) #[[ATTR9]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[SIN]]) #[[ATTR9]] -// CHECK-NEXT: ret float [[ADD]] +// NONEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_sincos( +// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +// NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NONEWSTRUCTPATHTBAA-NEXT: [[SIN:%.*]] = alloca float, align 4 +// NONEWSTRUCTPATHTBAA-NEXT: [[COS:%.*]] = alloca float, align 4 +// NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[SIN]]) #[[ATTR9]] +// NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[COS]]) #[[ATTR9]] +// NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: call void @sincos(float noundef [[TMP0]], ptr noundef nonnull [[SIN]], ptr noundef nonnull [[COS]]) #[[ATTR9]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP1]], [[TMP2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[TMP3]] +// NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[COS]]) #[[ATTR9]] +// NONEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[SIN]]) #[[ATTR9]] +// NONEWSTRUCTPATHTBAA-NEXT: ret float [[ADD]] +// +// NEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_sincos( +// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +// NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NEWSTRUCTPATHTBAA-NEXT: [[SIN:%.*]] = alloca float, align 4 +// NEWSTRUCTPATHTBAA-NEXT: [[COS:%.*]] = alloca float, align 4 +// NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[SIN]]) #[[ATTR9]] +// NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[COS]]) #[[ATTR9]] +// NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: call void @sincos(float noundef [[TMP0]], ptr noundef nonnull [[SIN]], ptr noundef nonnull [[COS]]) #[[ATTR9]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[COS]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[MUL:%.*]] = fmul float [[TMP1]], [[TMP2]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[TMP3]] +// NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[COS]]) #[[ATTR9]] +// NEWSTRUCTPATHTBAA-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[SIN]]) #[[ATTR9]] +// NEWSTRUCTPATHTBAA-NEXT: ret float [[ADD]] // float test_sincos (float num[]) { float sin, cos; @@ -127,18 +194,31 @@ float test_sincos (float num[]) { } // TODO: The builtin return a complex type -// CHECK-LABEL: define dso_local float @test_cacoshf( -// CHECK-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x float] [[DOTFCA_0_INSERT]], float 0.000000e+00, 1 -// CHECK-NEXT: [[CALL:%.*]] = tail call { float, float } @cacoshf([2 x float] noundef alignstack(8) [[DOTFCA_1_INSERT]]) #[[ATTR9]] -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { float, float } [[CALL]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP2]] -// CHECK-NEXT: ret float [[ADD]] +// NONEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_cacoshf( +// NONEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7]] { +// NONEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NONEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[TMP0]], 0 +// NONEWSTRUCTPATHTBAA-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x float] [[DOTFCA_0_INSERT]], float 0.000000e+00, 1 +// NONEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call { float, float } @cacoshf([2 x float] noundef alignstack(8) [[DOTFCA_1_INSERT]]) #[[ATTR9]] +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = extractvalue { float, float } [[CALL]], 0 +// NONEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA2]] +// NONEWSTRUCTPATHTBAA-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP2]] +// NONEWSTRUCTPATHTBAA-NEXT: ret float [[ADD]] +// +// NEWSTRUCTPATHTBAA-LABEL: define dso_local float @test_cacoshf( +// NEWSTRUCTPATHTBAA-SAME: ptr noundef readonly captures(none) [[NUM:%.*]]) local_unnamed_addr #[[ATTR7]] { +// NEWSTRUCTPATHTBAA-NEXT: [[ENTRY:.*:]] +// NEWSTRUCTPATHTBAA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8 +// NEWSTRUCTPATHTBAA-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[TMP0]], 0 +// NEWSTRUCTPATHTBAA-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x float] [[DOTFCA_0_INSERT]], float 0.000000e+00, 1 +// NEWSTRUCTPATHTBAA-NEXT: [[CALL:%.*]] = tail call { float, float } @cacoshf([2 x float] noundef alignstack(8) [[DOTFCA_1_INSERT]]) #[[ATTR9]] +// NEWSTRUCTPATHTBAA-NEXT: [[TMP1:%.*]] = extractvalue { float, float } [[CALL]], 0 +// NEWSTRUCTPATHTBAA-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] +// NEWSTRUCTPATHTBAA-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP2]] +// NEWSTRUCTPATHTBAA-NEXT: ret float [[ADD]] // float test_cacoshf (float num[]) { float _Complex z = cacoshf(num[2]); // Don't emit TBAA metadata @@ -147,13 +227,13 @@ float test_cacoshf (float num[]) { } //. -// NONEWSTRUCTPATHTBAA: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// NONEWSTRUCTPATHTBAA: [[FLOAT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // NONEWSTRUCTPATHTBAA: [[META3]] = !{!"float", [[META4:![0-9]+]], i64 0} // NONEWSTRUCTPATHTBAA: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} // NONEWSTRUCTPATHTBAA: [[META5]] = !{!"Simple C/C++ TBAA"} -// NONEWSTRUCTPATHTBAA: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// NONEWSTRUCTPATHTBAA: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} // NONEWSTRUCTPATHTBAA: [[META7]] = !{!"int", [[META4]], i64 0} -// NONEWSTRUCTPATHTBAA: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// NONEWSTRUCTPATHTBAA: [[DOUBLE_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} // NONEWSTRUCTPATHTBAA: [[META9]] = !{!"double", [[META4]], i64 0} //. // NEWSTRUCTPATHTBAA: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0, i64 4} @@ -165,6 +245,3 @@ float test_cacoshf (float num[]) { // NEWSTRUCTPATHTBAA: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0, i64 8} // NEWSTRUCTPATHTBAA: [[META9]] = !{[[META4]], i64 8, !"double"} //. -//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -// NEWSTRUCTPATHTBAA: {{.*}} -// NONEWSTRUCTPATHTBAA: {{.*}} diff --git a/clang/test/CodeGen/sanitize-metadata-nosanitize.c b/clang/test/CodeGen/sanitize-metadata-nosanitize.c index eabcbd1409fe2..22ed25bd3b670 100644 --- a/clang/test/CodeGen/sanitize-metadata-nosanitize.c +++ b/clang/test/CodeGen/sanitize-metadata-nosanitize.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6 // RUN: %clang_cc1 -O -fexperimental-sanitize-metadata=covered -fexperimental-sanitize-metadata=atomics -fexperimental-sanitize-metadata=uar -triple x86_64-gnu-linux -x c -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK //. @@ -11,9 +11,9 @@ // CHECK: @llvm.global_dtors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered2.module_dtor, ptr @__sanitizer_metadata_covered2.module_dtor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics2.module_dtor, ptr @__sanitizer_metadata_atomics2.module_dtor }] //. // CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) -// CHECK-LABEL: define dso_local void @escape -// CHECK-SAME: (ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !pcsections [[META2:![0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @escape( +// CHECK-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !pcsections [[META2:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret void // __attribute__((noinline, not_tail_called)) void escape(const volatile void *p) { @@ -22,14 +22,14 @@ __attribute__((noinline, not_tail_called)) void escape(const volatile void *p) { } // CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none) -// CHECK-LABEL: define dso_local i32 @normal_function -// CHECK-SAME: (ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !pcsections [[META4:![0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local i32 @normal_function( +// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !pcsections [[META4:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6:![0-9]+]] // CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META11:![0-9]+]] // CHECK-NEXT: notail call void @escape(ptr noundef nonnull [[X_ADDR]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA12:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] // CHECK-NEXT: ret i32 [[TMP0]] // int normal_function(int *x, int *y) { @@ -39,14 +39,14 @@ int normal_function(int *x, int *y) { } // CHECK: Function Attrs: disable_sanitizer_instrumentation mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none) -// CHECK-LABEL: define dso_local i32 @test_disable_sanitize_instrumentation -// CHECK-SAME: (ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local i32 @test_disable_sanitize_instrumentation( +// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6]] // CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4 // CHECK-NEXT: notail call void @escape(ptr noundef nonnull [[X_ADDR]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] // CHECK-NEXT: ret i32 [[TMP0]] // __attribute__((disable_sanitizer_instrumentation)) int test_disable_sanitize_instrumentation(int *x, int *y) { @@ -56,14 +56,14 @@ __attribute__((disable_sanitizer_instrumentation)) int test_disable_sanitize_ins } // CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none) -// CHECK-LABEL: define dso_local i32 @test_no_sanitize_thread -// CHECK-SAME: (ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !pcsections [[META14:![0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local i32 @test_no_sanitize_thread( +// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !pcsections [[META14:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6]] // CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META11]] // CHECK-NEXT: notail call void @escape(ptr noundef nonnull [[X_ADDR]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] // CHECK-NEXT: ret i32 [[TMP0]] // __attribute__((no_sanitize("thread"))) int test_no_sanitize_thread(int *x, int *y) { @@ -73,14 +73,14 @@ __attribute__((no_sanitize("thread"))) int test_no_sanitize_thread(int *x, int * } // CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none) -// CHECK-LABEL: define dso_local i32 @test_no_sanitize_all -// CHECK-SAME: (ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] !pcsections [[META14]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local i32 @test_no_sanitize_all( +// CHECK-SAME: ptr noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] !pcsections [[META14]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[TBAA6]] +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8, !tbaa [[INTPTR_TBAA6]] // CHECK-NEXT: store atomic i32 1, ptr [[X]] monotonic, align 4, !pcsections [[META11]] // CHECK-NEXT: notail call void @escape(ptr noundef nonnull [[X_ADDR]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] // CHECK-NEXT: ret i32 [[TMP0]] // __attribute__((no_sanitize("all"))) int test_no_sanitize_all(int *x, int *y) { @@ -101,13 +101,13 @@ __attribute__((no_sanitize("all"))) int test_no_sanitize_all(int *x, int *y) { // CHECK: [[META3]] = !{i64 0} // CHECK: [[META4]] = !{!"sanmd_covered2!C", [[META5:![0-9]+]]} // CHECK: [[META5]] = !{i64 3} -// CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[INTPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} // CHECK: [[META7]] = !{!"p1 int", [[META8:![0-9]+]], i64 0} // CHECK: [[META8]] = !{!"any pointer", [[META9:![0-9]+]], i64 0} // CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0} // CHECK: [[META10]] = !{!"Simple C/C++ TBAA"} // CHECK: [[META11]] = !{!"sanmd_atomics2!C"} -// CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// CHECK: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} // CHECK: [[META13]] = !{!"int", [[META9]], i64 0} // CHECK: [[META14]] = !{!"sanmd_covered2!C", [[META15:![0-9]+]]} // CHECK: [[META15]] = !{i64 2} diff --git a/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp b/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp index a77593f5df738..8969e12f8f797 100644 --- a/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp +++ b/clang/test/CodeGenCXX/attr-likelihood-if-branch-weights.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -O1 -disable-llvm-passes -emit-llvm %s -o - -triple=x86_64-- | FileCheck %s extern volatile bool b; @@ -6,22 +6,23 @@ extern volatile int i; extern bool A(); extern bool B(); -// CHECK-LABEL: @_Z1fv( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z1fv( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2:![0-9]+]], !range [[RNG6:![0-9]+]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 true) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -// CHECK: if.then: +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2:![0-9]+]], !range [[RNG6:![0-9]+]], !noundef [[META7:![0-9]+]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 true) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK: [[IF_THEN]]: // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Av() // CHECK-NEXT: store i1 [[CALL]], ptr [[RETVAL]], align 1 -// CHECK-NEXT: br label [[RETURN:%.*]] -// CHECK: if.end: +// CHECK-NEXT: br label %[[RETURN:.*]] +// CHECK: [[IF_END]]: // CHECK-NEXT: [[CALL1:%.*]] = call noundef zeroext i1 @_Z1Bv() // CHECK-NEXT: store i1 [[CALL1]], ptr [[RETVAL]], align 1 -// CHECK-NEXT: br label [[RETURN]] -// CHECK: return: +// CHECK-NEXT: br label %[[RETURN]] +// CHECK: [[RETURN]]: // CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 1 // CHECK-NEXT: ret i1 [[TMP1]] // @@ -33,22 +34,23 @@ bool f() { return B(); } -// CHECK-LABEL: @_Z1gv( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z1gv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -// CHECK: if.then: +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK: [[IF_THEN]]: // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Av() // CHECK-NEXT: store i1 [[CALL]], ptr [[RETVAL]], align 1 -// CHECK-NEXT: br label [[RETURN:%.*]] -// CHECK: if.end: +// CHECK-NEXT: br label %[[RETURN:.*]] +// CHECK: [[IF_END]]: // CHECK-NEXT: [[CALL1:%.*]] = call noundef zeroext i1 @_Z1Bv() // CHECK-NEXT: store i1 [[CALL1]], ptr [[RETVAL]], align 1 -// CHECK-NEXT: br label [[RETURN]] -// CHECK: return: +// CHECK-NEXT: br label %[[RETURN]] +// CHECK: [[RETURN]]: // CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 1 // CHECK-NEXT: ret i1 [[TMP1]] // @@ -61,22 +63,23 @@ bool g() { return B(); } -// CHECK-LABEL: @_Z1hv( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z1hv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -// CHECK: if.then: +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK: [[IF_THEN]]: // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Av() // CHECK-NEXT: store i1 [[CALL]], ptr [[RETVAL]], align 1 -// CHECK-NEXT: br label [[RETURN:%.*]] -// CHECK: if.end: +// CHECK-NEXT: br label %[[RETURN:.*]] +// CHECK: [[IF_END]]: // CHECK-NEXT: [[CALL1:%.*]] = call noundef zeroext i1 @_Z1Bv() // CHECK-NEXT: store i1 [[CALL1]], ptr [[RETVAL]], align 1 -// CHECK-NEXT: br label [[RETURN]] -// CHECK: return: +// CHECK-NEXT: br label %[[RETURN]] +// CHECK: [[RETURN]]: // CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 1 // CHECK-NEXT: ret i1 [[TMP1]] // @@ -87,18 +90,19 @@ bool h() { return B(); } -// CHECK-LABEL: @_Z8NullStmtv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -// CHECK: if.then: -// CHECK-NEXT: br label [[IF_END:%.*]] -// CHECK: if.else: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: +// CHECK-LABEL: define dso_local void @_Z8NullStmtv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: br label %[[IF_END:.*]] +// CHECK: [[IF_ELSE]]: +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: // CHECK-NEXT: ret void // void NullStmt() { @@ -110,33 +114,34 @@ void NullStmt() { } } -// CHECK-LABEL: @_Z6IfStmtv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END2:%.*]] -// CHECK: if.then: +// CHECK-LABEL: define dso_local void @_Z6IfStmtv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END2:.*]] +// CHECK: [[IF_THEN]]: // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv() -// CHECK-NEXT: br i1 [[CALL]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] -// CHECK: if.then1: -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: -// CHECK-NEXT: br label [[IF_END2]] -// CHECK: if.end2: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK-NEXT: br i1 [[TOBOOL3]], label [[IF_THEN4:%.*]], label [[IF_END8:%.*]] -// CHECK: if.then4: +// CHECK-NEXT: br i1 [[CALL]], label %[[IF_THEN1:.*]], label %[[IF_END:.*]] +// CHECK: [[IF_THEN1]]: +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: +// CHECK-NEXT: br label %[[IF_END2]] +// CHECK: [[IF_END2]]: +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV3:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK-NEXT: br i1 [[LOADEDV3]], label %[[IF_THEN4:.*]], label %[[IF_END8:.*]] +// CHECK: [[IF_THEN4]]: // CHECK-NEXT: [[CALL5:%.*]] = call noundef zeroext i1 @_Z1Bv() // CHECK-NEXT: [[CALL5_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CALL5]], i1 false) -// CHECK-NEXT: br i1 [[CALL5_EXPVAL]], label [[IF_THEN6:%.*]], label [[IF_END7:%.*]] -// CHECK: if.then6: -// CHECK-NEXT: store volatile i8 0, ptr @b, align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[IF_END7]] -// CHECK: if.end7: -// CHECK-NEXT: br label [[IF_END8]] -// CHECK: if.end8: +// CHECK-NEXT: br i1 [[CALL5_EXPVAL]], label %[[IF_THEN6:.*]], label %[[IF_END7:.*]] +// CHECK: [[IF_THEN6]]: +// CHECK-NEXT: store volatile i8 0, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: br label %[[IF_END7]] +// CHECK: [[IF_END7]]: +// CHECK-NEXT: br label %[[IF_END8]] +// CHECK: [[IF_END8]]: // CHECK-NEXT: ret void // void IfStmt() { @@ -149,37 +154,38 @@ void IfStmt() { } } -// CHECK-LABEL: @_Z9WhileStmtv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -// CHECK: if.then: -// CHECK-NEXT: br label [[WHILE_COND:%.*]] -// CHECK: while.cond: +// CHECK-LABEL: define dso_local void @_Z9WhileStmtv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: br label %[[WHILE_COND:.*]] +// CHECK: [[WHILE_COND]]: // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv() -// CHECK-NEXT: br i1 [[CALL]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]] -// CHECK: while.body: -// CHECK-NEXT: br label [[WHILE_COND]], !llvm.loop [[LOOP7:![0-9]+]] -// CHECK: while.end: -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END7:%.*]] -// CHECK: if.then2: -// CHECK-NEXT: br label [[WHILE_COND3:%.*]] -// CHECK: while.cond3: +// CHECK-NEXT: br i1 [[CALL]], label %[[WHILE_BODY:.*]], label %[[WHILE_END:.*]] +// CHECK: [[WHILE_BODY]]: +// CHECK-NEXT: br label %[[WHILE_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK: [[WHILE_END]]: +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK-NEXT: br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_END7:.*]] +// CHECK: [[IF_THEN2]]: +// CHECK-NEXT: br label %[[WHILE_COND3:.*]] +// CHECK: [[WHILE_COND3]]: // CHECK-NEXT: [[CALL4:%.*]] = call noundef zeroext i1 @_Z1Bv() // CHECK-NEXT: [[CALL4_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CALL4]], i1 false) -// CHECK-NEXT: br i1 [[CALL4_EXPVAL]], label [[WHILE_BODY5:%.*]], label [[WHILE_END6:%.*]] -// CHECK: while.body5: -// CHECK-NEXT: store volatile i8 0, ptr @b, align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[WHILE_COND3]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK: while.end6: -// CHECK-NEXT: br label [[IF_END7]] -// CHECK: if.end7: +// CHECK-NEXT: br i1 [[CALL4_EXPVAL]], label %[[WHILE_BODY5:.*]], label %[[WHILE_END6:.*]] +// CHECK: [[WHILE_BODY5]]: +// CHECK-NEXT: store volatile i8 0, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: br label %[[WHILE_COND3]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK: [[WHILE_END6]]: +// CHECK-NEXT: br label %[[IF_END7]] +// CHECK: [[IF_END7]]: // CHECK-NEXT: ret void // void WhileStmt() { @@ -191,35 +197,36 @@ void WhileStmt() { [[unlikely]] { b = false; } } -// CHECK-LABEL: @_Z6DoStmtv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -// CHECK: if.then: -// CHECK-NEXT: br label [[DO_BODY:%.*]] -// CHECK: do.body: -// CHECK-NEXT: br label [[DO_COND:%.*]] -// CHECK: do.cond: +// CHECK-LABEL: define dso_local void @_Z6DoStmtv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: br label %[[DO_BODY:.*]] +// CHECK: [[DO_BODY]]: +// CHECK-NEXT: br label %[[DO_COND:.*]] +// CHECK: [[DO_COND]]: // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv() -// CHECK-NEXT: br i1 [[CALL]], label [[DO_BODY]], label [[DO_END:%.*]], !llvm.loop [[LOOP11:![0-9]+]] -// CHECK: do.end: -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END7:%.*]] -// CHECK: if.then2: -// CHECK-NEXT: br label [[DO_BODY3:%.*]] -// CHECK: do.body3: -// CHECK-NEXT: br label [[DO_COND4:%.*]] -// CHECK: do.cond4: +// CHECK-NEXT: br i1 [[CALL]], label %[[DO_BODY]], label %[[DO_END:.*]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK: [[DO_END]]: +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK-NEXT: br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_END7:.*]] +// CHECK: [[IF_THEN2]]: +// CHECK-NEXT: br label %[[DO_BODY3:.*]] +// CHECK: [[DO_BODY3]]: +// CHECK-NEXT: br label %[[DO_COND4:.*]] +// CHECK: [[DO_COND4]]: // CHECK-NEXT: [[CALL5:%.*]] = call noundef zeroext i1 @_Z1Bv() -// CHECK-NEXT: br i1 [[CALL5]], label [[DO_BODY3]], label [[DO_END6:%.*]], !llvm.loop [[LOOP12:![0-9]+]] -// CHECK: do.end6: -// CHECK-NEXT: br label [[IF_END7]] -// CHECK: if.end7: +// CHECK-NEXT: br i1 [[CALL5]], label %[[DO_BODY3]], label %[[DO_END6:.*]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK: [[DO_END6]]: +// CHECK-NEXT: br label %[[IF_END7]] +// CHECK: [[IF_END7]]: // CHECK-NEXT: ret void // void DoStmt() { @@ -234,36 +241,37 @@ void DoStmt() { while (B()); } -// CHECK-LABEL: @_Z7ForStmtv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -// CHECK: if.then: -// CHECK-NEXT: br label [[FOR_COND:%.*]] -// CHECK: for.cond: +// CHECK-LABEL: define dso_local void @_Z7ForStmtv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: br label %[[FOR_COND:.*]] +// CHECK: [[FOR_COND]]: // CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z1Bv() -// CHECK-NEXT: br i1 [[CALL]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] -// CHECK: for.body: -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] -// CHECK: for.end: -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END7:%.*]] -// CHECK: if.then2: -// CHECK-NEXT: br label [[FOR_COND3:%.*]] -// CHECK: for.cond3: +// CHECK-NEXT: br i1 [[CALL]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK: [[FOR_BODY]]: +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK: [[FOR_END]]: +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK-NEXT: br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_END7:.*]] +// CHECK: [[IF_THEN2]]: +// CHECK-NEXT: br label %[[FOR_COND3:.*]] +// CHECK: [[FOR_COND3]]: // CHECK-NEXT: [[CALL4:%.*]] = call noundef zeroext i1 @_Z1Bv() // CHECK-NEXT: [[CALL4_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CALL4]], i1 false) -// CHECK-NEXT: br i1 [[CALL4_EXPVAL]], label [[FOR_BODY5:%.*]], label [[FOR_END6:%.*]] -// CHECK: for.body5: -// CHECK-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP14:![0-9]+]] -// CHECK: for.end6: -// CHECK-NEXT: br label [[IF_END7]] -// CHECK: if.end7: +// CHECK-NEXT: br i1 [[CALL4_EXPVAL]], label %[[FOR_BODY5:.*]], label %[[FOR_END6:.*]] +// CHECK: [[FOR_BODY5]]: +// CHECK-NEXT: br label %[[FOR_COND3]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK: [[FOR_END6]]: +// CHECK-NEXT: br label %[[IF_END7]] +// CHECK: [[IF_END7]]: // CHECK-NEXT: ret void // void ForStmt() { @@ -275,20 +283,21 @@ void ForStmt() { [[unlikely]] {} } -// CHECK-LABEL: @_Z8GotoStmtv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -// CHECK: if.then: -// CHECK-NEXT: br label [[END:%.*]] -// CHECK: if.else: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[IF_END:%.*]] -// CHECK: if.end: -// CHECK-NEXT: br label [[END]] -// CHECK: end: +// CHECK-LABEL: define dso_local void @_Z8GotoStmtv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: br label %[[END:.*]] +// CHECK: [[IF_ELSE]]: +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: br label %[[IF_END:.*]] +// CHECK: [[IF_END]]: +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]]: // CHECK-NEXT: ret void // void GotoStmt() { @@ -301,18 +310,19 @@ void GotoStmt() { end:; } -// CHECK-LABEL: @_Z10ReturnStmtv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -// CHECK: if.then: -// CHECK-NEXT: br label [[IF_END:%.*]] -// CHECK: if.else: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: +// CHECK-LABEL: define dso_local void @_Z10ReturnStmtv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: br label %[[IF_END:.*]] +// CHECK: [[IF_ELSE]]: +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: // CHECK-NEXT: ret void // void ReturnStmt() { @@ -324,35 +334,36 @@ void ReturnStmt() { } } -// CHECK-LABEL: @_Z10SwitchStmtv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -// CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA15:![0-9]+]] -// CHECK-NEXT: switch i32 [[TMP1]], label [[SW_EPILOG:%.*]] [ +// CHECK-LABEL: define dso_local void @_Z10SwitchStmtv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: [[LOADEDV_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[LOADEDV]], i1 false) +// CHECK-NEXT: br i1 [[LOADEDV_EXPVAL]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA16:![0-9]+]] +// CHECK-NEXT: switch i32 [[TMP1]], label %[[SW_EPILOG:.*]] [ // CHECK-NEXT: ] -// CHECK: sw.epilog: -// CHECK-NEXT: br label [[IF_END:%.*]] -// CHECK: if.else: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: -// CHECK-NEXT: [[TMP2:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[TBAA2]], !range [[RNG6]] -// CHECK-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_ELSE4:%.*]] -// CHECK: if.then2: -// CHECK-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA15]] -// CHECK-NEXT: switch i32 [[TMP3]], label [[SW_EPILOG3:%.*]] [ +// CHECK: [[SW_EPILOG]]: +// CHECK-NEXT: br label %[[IF_END:.*]] +// CHECK: [[IF_ELSE]]: +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: +// CHECK-NEXT: [[TMP2:%.*]] = load volatile i8, ptr @b, align 1, !tbaa [[BOOL_TBAA2]], !range [[RNG6]], !noundef [[META7]] +// CHECK-NEXT: [[LOADEDV1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-NEXT: br i1 [[LOADEDV1]], label %[[IF_THEN2:.*]], label %[[IF_ELSE4:.*]] +// CHECK: [[IF_THEN2]]: +// CHECK-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA16]] +// CHECK-NEXT: switch i32 [[TMP3]], label %[[SW_EPILOG3:.*]] [ // CHECK-NEXT: ] -// CHECK: sw.epilog3: -// CHECK-NEXT: br label [[IF_END5:%.*]] -// CHECK: if.else4: -// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[IF_END5]] -// CHECK: if.end5: +// CHECK: [[SW_EPILOG3]]: +// CHECK-NEXT: br label %[[IF_END5:.*]] +// CHECK: [[IF_ELSE4]]: +// CHECK-NEXT: store volatile i8 1, ptr @b, align 1, !tbaa [[BOOL_TBAA2]] +// CHECK-NEXT: br label %[[IF_END5]] +// CHECK: [[IF_END5]]: // CHECK-NEXT: ret void // void SwitchStmt() { @@ -371,3 +382,21 @@ void SwitchStmt() { } } +//. +// CHECK: [[BOOL_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"bool", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[RNG6]] = !{i8 0, i8 2} +// CHECK: [[META7]] = !{} +// CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]} +// CHECK: [[META9]] = !{!"llvm.loop.mustprogress"} +// CHECK: [[META10]] = !{!"llvm.loop.unroll.disable"} +// CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META9]], [[META10]]} +// CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META9]], [[META10]]} +// CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META9]], [[META10]]} +// CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META9]], [[META10]]} +// CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META9]], [[META10]]} +// CHECK: [[INT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +// CHECK: [[META17]] = !{!"int", [[META4]], i64 0} +//. diff --git a/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp b/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp index 151b77ac1007b..441faac6bdd3b 100644 --- a/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp +++ b/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp @@ -1,61 +1,64 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -O1 -disable-llvm-passes -emit-llvm %s -o - -triple=x86_64-linux-gnu -verify // RUN: %clang_cc1 -O1 -disable-llvm-passes -emit-llvm %s -o - -triple=x86_64-linux-gnu | FileCheck %s -// CHECK-LABEL: @_Z2wli( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @_Z2wli( +// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[E_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: br label [[WHILE_COND:%.*]] -// CHECK: while.cond: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] +// CHECK-NEXT: br label %[[WHILE_COND:.*]] +// CHECK: [[WHILE_COND]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 true) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]] -// CHECK: while.body: -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label %[[WHILE_BODY:.*]], label %[[WHILE_END:.*]] +// CHECK: [[WHILE_BODY]]: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[WHILE_COND]], !llvm.loop [[LOOP6:![0-9]+]] -// CHECK: while.end: +// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[WHILE_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK: [[WHILE_END]]: // CHECK-NEXT: ret void // void wl(int e){ while(e) [[likely]] ++e; } -// CHECK-LABEL: @_Z2wui( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @_Z2wui( +// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[E_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[WHILE_COND:%.*]] -// CHECK: while.cond: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[WHILE_COND:.*]] +// CHECK: [[WHILE_COND]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK-NEXT: [[TOBOOL_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[TOBOOL]], i1 false) -// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]] -// CHECK: while.body: -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: br i1 [[TOBOOL_EXPVAL]], label %[[WHILE_BODY:.*]], label %[[WHILE_END:.*]] +// CHECK: [[WHILE_BODY]]: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[WHILE_COND]], !llvm.loop [[LOOP9:![0-9]+]] -// CHECK: while.end: +// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[WHILE_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK: [[WHILE_END]]: // CHECK-NEXT: ret void // void wu(int e){ while(e) [[unlikely]] ++e; } -// CHECK-LABEL: @_Z15w_branch_elidedj( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @_Z15w_branch_elidedj( +// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[E_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[WHILE_BODY:%.*]] -// CHECK: while.body: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[WHILE_BODY:.*]] +// CHECK: [[WHILE_BODY]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[INC:%.*]] = add i32 [[TMP0]], 1 -// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[WHILE_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[WHILE_BODY]], !llvm.loop [[LOOP10:![0-9]+]] // void w_branch_elided(unsigned e){ // expected-warning@+2 {{attribute 'likely' has no effect when annotating an infinite loop}} @@ -63,31 +66,32 @@ void w_branch_elided(unsigned e){ while(1) [[likely]] ++e; } -// CHECK-LABEL: @_Z2flj( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @_Z2flj( +// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[E_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR3:[0-9]+]] -// CHECK-NEXT: store i32 0, ptr [[I]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[FOR_COND:%.*]] -// CHECK: for.cond: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i32 0, ptr [[I]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[FOR_COND:.*]] +// CHECK: [[FOR_COND]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP0]], [[TMP1]] // CHECK-NEXT: [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 true) -// CHECK-NEXT: br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -// CHECK: for.cond.cleanup: +// CHECK-NEXT: br i1 [[CMP_EXPVAL]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +// CHECK: [[FOR_COND_CLEANUP]]: // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR3]] -// CHECK-NEXT: br label [[FOR_END:%.*]] -// CHECK: for.body: -// CHECK-NEXT: br label [[FOR_INC:%.*]] -// CHECK: for.inc: -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: br label %[[FOR_END:.*]] +// CHECK: [[FOR_BODY]]: +// CHECK-NEXT: br label %[[FOR_INC:.*]] +// CHECK: [[FOR_INC]]: +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] -// CHECK: for.end: +// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK: [[FOR_END]]: // CHECK-NEXT: ret void // void fl(unsigned e) @@ -95,31 +99,32 @@ void fl(unsigned e) for(int i = 0; i != e; ++e) [[likely]]; } -// CHECK-LABEL: @_Z2fui( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @_Z2fui( +// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[E_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 [[E:%.*]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i32 [[E]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR3]] -// CHECK-NEXT: store i32 0, ptr [[I]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[FOR_COND:%.*]] -// CHECK: for.cond: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i32 0, ptr [[I]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[FOR_COND:.*]] +// CHECK: [[FOR_COND]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP0]], [[TMP1]] // CHECK-NEXT: [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 false) -// CHECK-NEXT: br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -// CHECK: for.cond.cleanup: +// CHECK-NEXT: br i1 [[CMP_EXPVAL]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +// CHECK: [[FOR_COND_CLEANUP]]: // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR3]] -// CHECK-NEXT: br label [[FOR_END:%.*]] -// CHECK: for.body: -// CHECK-NEXT: br label [[FOR_INC:%.*]] -// CHECK: for.inc: -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: br label %[[FOR_END:.*]] +// CHECK: [[FOR_BODY]]: +// CHECK-NEXT: br label %[[FOR_INC:.*]] +// CHECK: [[FOR_INC]]: +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] -// CHECK: for.end: +// CHECK-NEXT: store i32 [[INC]], ptr [[E_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK: [[FOR_END]]: // CHECK-NEXT: ret void // void fu(int e) @@ -127,62 +132,64 @@ void fu(int e) for(int i = 0; i != e; ++e) [[unlikely]]; } -// CHECK-LABEL: @_Z15f_branch_elidedv( -// CHECK-NEXT: entry: -// CHECK-NEXT: br label [[FOR_COND:%.*]] -// CHECK: for.cond: -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK-LABEL: define dso_local void @_Z15f_branch_elidedv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: br label %[[FOR_COND:.*]] +// CHECK: [[FOR_COND]]: +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // void f_branch_elided() { for(;;) [[likely]]; } -// CHECK-LABEL: @_Z3frlOA4_i( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @_Z3frlOA4_i( +// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__RANGE1:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__BEGIN1:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__END1:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[E:%.*]], ptr [[E_ADDR]], align 8, !tbaa [[TBAA14:![0-9]+]] +// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8, !tbaa [[INTPTR_TBAA14:![0-9]+]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[__RANGE1]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: store ptr [[TMP0]], ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17:![0-9]+]], !align [[META18:![0-9]+]] +// CHECK-NEXT: store ptr [[TMP0]], ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[__BEGIN1]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]] // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[__END1]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]] // CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0 // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY1]], i64 4 -// CHECK-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: br label [[FOR_COND:%.*]] -// CHECK: for.cond: -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[INTPTR_TBAA14]] +// CHECK-NEXT: br label %[[FOR_COND:.*]] +// CHECK: [[FOR_COND]]: +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[INTPTR_TBAA14]] // CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[TMP3]], [[TMP4]] // CHECK-NEXT: [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 true) -// CHECK-NEXT: br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -// CHECK: for.cond.cleanup: +// CHECK-NEXT: br i1 [[CMP_EXPVAL]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +// CHECK: [[FOR_COND_CLEANUP]]: // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[__END1]]) #[[ATTR3]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[__BEGIN1]]) #[[ATTR3]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[__RANGE1]]) #[[ATTR3]] -// CHECK-NEXT: br label [[FOR_END:%.*]] -// CHECK: for.body: +// CHECK-NEXT: br label %[[FOR_END:.*]] +// CHECK: [[FOR_BODY]]: // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR3]] -// CHECK-NEXT: br label [[FOR_INC:%.*]] -// CHECK: for.inc: -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: br label %[[FOR_INC:.*]] +// CHECK: [[FOR_INC]]: +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] // CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 1 -// CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] -// CHECK: for.end: +// CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK: [[FOR_END]]: // CHECK-NEXT: ret void // void frl(int (&&e) [4]) @@ -190,54 +197,76 @@ void frl(int (&&e) [4]) for(int i : e) [[likely]]; } -// CHECK-LABEL: @_Z3fruOA4_i( -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @_Z3fruOA4_i( +// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__RANGE1:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__BEGIN1:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[__END1:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[E:%.*]], ptr [[E_ADDR]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8, !tbaa [[INTPTR_TBAA14]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[__RANGE1]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: store ptr [[TMP0]], ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]] +// CHECK-NEXT: store ptr [[TMP0]], ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[__BEGIN1]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]] // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[__END1]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[INTPTR_TBAA14]], !nonnull [[META17]], !align [[META18]] // CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0 // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY1]], i64 4 -// CHECK-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: br label [[FOR_COND:%.*]] -// CHECK: for.cond: -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[INTPTR_TBAA14]] +// CHECK-NEXT: br label %[[FOR_COND:.*]] +// CHECK: [[FOR_COND]]: +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[INTPTR_TBAA14]] // CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[TMP3]], [[TMP4]] // CHECK-NEXT: [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 false) -// CHECK-NEXT: br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -// CHECK: for.cond.cleanup: +// CHECK-NEXT: br i1 [[CMP_EXPVAL]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +// CHECK: [[FOR_COND_CLEANUP]]: // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[__END1]]) #[[ATTR3]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[__BEGIN1]]) #[[ATTR3]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[__RANGE1]]) #[[ATTR3]] -// CHECK-NEXT: br label [[FOR_END:%.*]] -// CHECK: for.body: +// CHECK-NEXT: br label %[[FOR_END:.*]] +// CHECK: [[FOR_BODY]]: // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR3]] -// CHECK-NEXT: br label [[FOR_INC:%.*]] -// CHECK: for.inc: -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: br label %[[FOR_INC:.*]] +// CHECK: [[FOR_INC]]: +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] // CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 1 -// CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] -// CHECK: for.end: +// CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[INTPTR_TBAA14]] +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK: [[FOR_END]]: // CHECK-NEXT: ret void // void fru(int (&&e) [4]) { for(int i : e) [[unlikely]]; } +//. +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META7:![0-9]+]], [[META8:![0-9]+]]} +// CHECK: [[META7]] = !{!"llvm.loop.mustprogress"} +// CHECK: [[META8]] = !{!"llvm.loop.unroll.disable"} +// CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]], [[META8]]} +// CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META7]], [[META8]]} +// CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META8]]} +// CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META7]], [[META8]]} +// CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META7]], [[META8]]} +// CHECK: [[INTPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +// CHECK: [[META15]] = !{!"p1 int", [[META16:![0-9]+]], i64 0} +// CHECK: [[META16]] = !{!"any pointer", [[META4]], i64 0} +// CHECK: [[META17]] = !{} +// CHECK: [[META18]] = !{i64 4} +// CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META8]]} +// CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]]} +//. diff --git a/clang/test/CodeGenCXX/attr-likelihood-switch-branch-weights.cpp b/clang/test/CodeGenCXX/attr-likelihood-switch-branch-weights.cpp index 328d1bcc76208..bb6f5bb248e3e 100644 --- a/clang/test/CodeGenCXX/attr-likelihood-switch-branch-weights.cpp +++ b/clang/test/CodeGenCXX/attr-likelihood-switch-branch-weights.cpp @@ -1,15 +1,16 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -O1 -disable-llvm-passes -emit-llvm %s -o - -triple=x86_64-linux-gnu | FileCheck %s extern volatile int i; -// CHECK-LABEL: @_Z8OneCaseLv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_EPILOG]] -// CHECK-NEXT: ], !prof !6 -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z8OneCaseLv( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2:![0-9]+]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_EPILOG]] +// CHECK-NEXT: ], !prof [[PROF6:![0-9]+]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void OneCaseL() { @@ -18,18 +19,19 @@ void OneCaseL() { } } -// CHECK-LABEL: @_Z8OneCaseUv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_BB:%.*]] -// CHECK-NEXT: ], !prof !7 -// CHECK: sw.bb: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @_Z8OneCaseUv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_BB:.*]] +// CHECK-NEXT: ], !prof [[PROF7:![0-9]+]] +// CHECK: [[SW_BB]]: +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK-NEXT: store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-NEXT: store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void OneCaseU() { @@ -38,14 +40,15 @@ void OneCaseU() { } } -// CHECK-LABEL: @_Z10TwoCasesLNv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_EPILOG]] -// CHECK-NEXT: i32 2, label [[SW_EPILOG]] -// CHECK-NEXT: ], !prof !8 -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z10TwoCasesLNv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_EPILOG]] +// CHECK-NEXT: i32 2, label %[[SW_EPILOG]] +// CHECK-NEXT: ], !prof [[PROF8:![0-9]+]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void TwoCasesLN() { @@ -55,14 +58,15 @@ void TwoCasesLN() { } } -// CHECK-LABEL: @_Z10TwoCasesUNv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_EPILOG]] -// CHECK-NEXT: i32 2, label [[SW_EPILOG]] -// CHECK-NEXT: ], !prof !9 -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z10TwoCasesUNv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_EPILOG]] +// CHECK-NEXT: i32 2, label %[[SW_EPILOG]] +// CHECK-NEXT: ], !prof [[PROF9:![0-9]+]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void TwoCasesUN() { @@ -72,14 +76,15 @@ void TwoCasesUN() { } } -// CHECK-LABEL: @_Z10TwoCasesLUv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_EPILOG]] -// CHECK-NEXT: i32 2, label [[SW_EPILOG]] -// CHECK-NEXT: ], !prof !10 -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z10TwoCasesLUv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_EPILOG]] +// CHECK-NEXT: i32 2, label %[[SW_EPILOG]] +// CHECK-NEXT: ], !prof [[PROF10:![0-9]+]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void TwoCasesLU() { @@ -89,20 +94,21 @@ void TwoCasesLU() { } } -// CHECK-LABEL: @_Z20CasesFallthroughNNLNv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_BB:%.*]] -// CHECK-NEXT: i32 2, label [[SW_BB]] -// CHECK-NEXT: i32 3, label [[SW_BB1:%.*]] -// CHECK-NEXT: i32 4, label [[SW_BB1]] -// CHECK-NEXT: ], !prof !11 -// CHECK: sw.bb: -// CHECK-NEXT: br label [[SW_BB1]] -// CHECK: sw.bb1: -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z20CasesFallthroughNNLNv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_BB:.*]] +// CHECK-NEXT: i32 2, label %[[SW_BB]] +// CHECK-NEXT: i32 3, label %[[SW_BB1:.*]] +// CHECK-NEXT: i32 4, label %[[SW_BB1]] +// CHECK-NEXT: ], !prof [[PROF11:![0-9]+]] +// CHECK: [[SW_BB]]: +// CHECK-NEXT: br label %[[SW_BB1]] +// CHECK: [[SW_BB1]]: +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void CasesFallthroughNNLN() { @@ -114,20 +120,21 @@ void CasesFallthroughNNLN() { } } -// CHECK-LABEL: @_Z20CasesFallthroughNNUNv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_BB:%.*]] -// CHECK-NEXT: i32 2, label [[SW_BB]] -// CHECK-NEXT: i32 3, label [[SW_BB1:%.*]] -// CHECK-NEXT: i32 4, label [[SW_BB1]] -// CHECK-NEXT: ], !prof !12 -// CHECK: sw.bb: -// CHECK-NEXT: br label [[SW_BB1]] -// CHECK: sw.bb1: -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z20CasesFallthroughNNUNv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_BB:.*]] +// CHECK-NEXT: i32 2, label %[[SW_BB]] +// CHECK-NEXT: i32 3, label %[[SW_BB1:.*]] +// CHECK-NEXT: i32 4, label %[[SW_BB1]] +// CHECK-NEXT: ], !prof [[PROF12:![0-9]+]] +// CHECK: [[SW_BB]]: +// CHECK-NEXT: br label %[[SW_BB1]] +// CHECK: [[SW_BB1]]: +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void CasesFallthroughNNUN() { @@ -139,29 +146,30 @@ void CasesFallthroughNNUN() { } } -// CHECK-LABEL: @_Z28CasesFallthroughRangeSmallLNv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_BB:%.*]] -// CHECK-NEXT: i32 2, label [[SW_BB]] -// CHECK-NEXT: i32 3, label [[SW_BB]] -// CHECK-NEXT: i32 4, label [[SW_BB]] -// CHECK-NEXT: i32 5, label [[SW_BB]] -// CHECK-NEXT: i32 102, label [[SW_BB1:%.*]] -// CHECK-NEXT: i32 103, label [[SW_BB2:%.*]] -// CHECK-NEXT: i32 104, label [[SW_BB2]] -// CHECK-NEXT: ], !prof !13 -// CHECK: sw.bb: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @_Z28CasesFallthroughRangeSmallLNv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_BB:.*]] +// CHECK-NEXT: i32 2, label %[[SW_BB]] +// CHECK-NEXT: i32 3, label %[[SW_BB]] +// CHECK-NEXT: i32 4, label %[[SW_BB]] +// CHECK-NEXT: i32 5, label %[[SW_BB]] +// CHECK-NEXT: i32 102, label %[[SW_BB1:.*]] +// CHECK-NEXT: i32 103, label %[[SW_BB2:.*]] +// CHECK-NEXT: i32 104, label %[[SW_BB2]] +// CHECK-NEXT: ], !prof [[PROF13:![0-9]+]] +// CHECK: [[SW_BB]]: +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK-NEXT: store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[SW_BB1]] -// CHECK: sw.bb1: -// CHECK-NEXT: br label [[SW_BB2]] -// CHECK: sw.bb2: -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-NEXT: store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[SW_BB1]] +// CHECK: [[SW_BB1]]: +// CHECK-NEXT: br label %[[SW_BB2]] +// CHECK: [[SW_BB2]]: +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void CasesFallthroughRangeSmallLN() { @@ -173,29 +181,30 @@ void CasesFallthroughRangeSmallLN() { } } -// CHECK-LABEL: @_Z28CasesFallthroughRangeSmallUNv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_EPILOG:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_BB:%.*]] -// CHECK-NEXT: i32 2, label [[SW_BB]] -// CHECK-NEXT: i32 3, label [[SW_BB]] -// CHECK-NEXT: i32 4, label [[SW_BB]] -// CHECK-NEXT: i32 5, label [[SW_BB]] -// CHECK-NEXT: i32 102, label [[SW_BB1:%.*]] -// CHECK-NEXT: i32 103, label [[SW_BB2:%.*]] -// CHECK-NEXT: i32 104, label [[SW_BB2]] -// CHECK-NEXT: ], !prof !14 -// CHECK: sw.bb: -// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] +// CHECK-LABEL: define dso_local void @_Z28CasesFallthroughRangeSmallUNv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_EPILOG:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_BB:.*]] +// CHECK-NEXT: i32 2, label %[[SW_BB]] +// CHECK-NEXT: i32 3, label %[[SW_BB]] +// CHECK-NEXT: i32 4, label %[[SW_BB]] +// CHECK-NEXT: i32 5, label %[[SW_BB]] +// CHECK-NEXT: i32 102, label %[[SW_BB1:.*]] +// CHECK-NEXT: i32 103, label %[[SW_BB2:.*]] +// CHECK-NEXT: i32 104, label %[[SW_BB2]] +// CHECK-NEXT: ], !prof [[PROF14:![0-9]+]] +// CHECK: [[SW_BB]]: +// CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK-NEXT: store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: br label [[SW_BB1]] -// CHECK: sw.bb1: -// CHECK-NEXT: br label [[SW_BB2]] -// CHECK: sw.bb2: -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-NEXT: store volatile i32 [[INC]], ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: br label %[[SW_BB1]] +// CHECK: [[SW_BB1]]: +// CHECK-NEXT: br label %[[SW_BB2]] +// CHECK: [[SW_BB2]]: +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void CasesFallthroughRangeSmallUN() { @@ -207,23 +216,24 @@ void CasesFallthroughRangeSmallUN() { } } -// CHECK-LABEL: @_Z29CasesFallthroughRangeLargeLLNv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_CASERANGE:%.*]] [ -// CHECK-NEXT: i32 1003, label [[SW_BB1:%.*]] -// CHECK-NEXT: i32 104, label [[SW_BB1]] -// CHECK-NEXT: ], !prof !8 -// CHECK: sw.bb: -// CHECK-NEXT: br label [[SW_BB1]] -// CHECK: sw.bb1: -// CHECK-NEXT: br label [[SW_EPILOG:%.*]] -// CHECK: sw.caserange: +// CHECK-LABEL: define dso_local void @_Z29CasesFallthroughRangeLargeLLNv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_CASERANGE:.*]] [ +// CHECK-NEXT: i32 1003, label %[[SW_BB1:.*]] +// CHECK-NEXT: i32 104, label %[[SW_BB1]] +// CHECK-NEXT: ], !prof [[PROF8]] +// CHECK: [[SW_BB:.*]]: +// CHECK-NEXT: br label %[[SW_BB1]] +// CHECK: [[SW_BB1]]: +// CHECK-NEXT: br label %[[SW_EPILOG:.*]] +// CHECK: [[SW_CASERANGE]]: // CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], 0 // CHECK-NEXT: [[INBOUNDS:%.*]] = icmp ule i32 [[TMP1]], 64 // CHECK-NEXT: [[INBOUNDS_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[INBOUNDS]], i1 true) -// CHECK-NEXT: br i1 [[INBOUNDS_EXPVAL]], label [[SW_BB:%.*]], label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-NEXT: br i1 [[INBOUNDS_EXPVAL]], label %[[SW_BB]], label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void CasesFallthroughRangeLargeLLN() { @@ -234,23 +244,24 @@ void CasesFallthroughRangeLargeLLN() { } } -// CHECK-LABEL: @_Z29CasesFallthroughRangeLargeUUNv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_CASERANGE:%.*]] [ -// CHECK-NEXT: i32 1003, label [[SW_BB1:%.*]] -// CHECK-NEXT: i32 104, label [[SW_BB1]] -// CHECK-NEXT: ], !prof !9 -// CHECK: sw.bb: -// CHECK-NEXT: br label [[SW_BB1]] -// CHECK: sw.bb1: -// CHECK-NEXT: br label [[SW_EPILOG:%.*]] -// CHECK: sw.caserange: +// CHECK-LABEL: define dso_local void @_Z29CasesFallthroughRangeLargeUUNv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_CASERANGE:.*]] [ +// CHECK-NEXT: i32 1003, label %[[SW_BB1:.*]] +// CHECK-NEXT: i32 104, label %[[SW_BB1]] +// CHECK-NEXT: ], !prof [[PROF9]] +// CHECK: [[SW_BB:.*]]: +// CHECK-NEXT: br label %[[SW_BB1]] +// CHECK: [[SW_BB1]]: +// CHECK-NEXT: br label %[[SW_EPILOG:.*]] +// CHECK: [[SW_CASERANGE]]: // CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], 0 // CHECK-NEXT: [[INBOUNDS:%.*]] = icmp ule i32 [[TMP1]], 64 // CHECK-NEXT: [[INBOUNDS_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[INBOUNDS]], i1 false) -// CHECK-NEXT: br i1 [[INBOUNDS_EXPVAL]], label [[SW_BB:%.*]], label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-NEXT: br i1 [[INBOUNDS_EXPVAL]], label %[[SW_BB]], label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void CasesFallthroughRangeLargeUUN() { @@ -261,15 +272,16 @@ void CasesFallthroughRangeLargeUUN() { } } -// CHECK-LABEL: @_Z15OneCaseDefaultLv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_EPILOG:%.*]] -// CHECK-NEXT: ], !prof !15 -// CHECK: sw.default: -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z15OneCaseDefaultLv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_EPILOG:.*]] +// CHECK-NEXT: ], !prof [[PROF15:![0-9]+]] +// CHECK: [[SW_DEFAULT]]: +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void OneCaseDefaultL() { @@ -279,15 +291,16 @@ void OneCaseDefaultL() { } } -// CHECK-LABEL: @_Z15OneCaseDefaultUv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_EPILOG:%.*]] -// CHECK-NEXT: ], !prof !16 -// CHECK: sw.default: -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z15OneCaseDefaultUv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_EPILOG:.*]] +// CHECK-NEXT: ], !prof [[PROF16:![0-9]+]] +// CHECK: [[SW_DEFAULT]]: +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void OneCaseDefaultU() { @@ -297,16 +310,17 @@ void OneCaseDefaultU() { } } -// CHECK-LABEL: @_Z18TwoCasesDefaultLNLv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_EPILOG:%.*]] -// CHECK-NEXT: i32 2, label [[SW_EPILOG]] -// CHECK-NEXT: ], !prof !17 -// CHECK: sw.default: -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z18TwoCasesDefaultLNLv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_EPILOG:.*]] +// CHECK-NEXT: i32 2, label %[[SW_EPILOG]] +// CHECK-NEXT: ], !prof [[PROF17:![0-9]+]] +// CHECK: [[SW_DEFAULT]]: +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void TwoCasesDefaultLNL() { @@ -317,16 +331,17 @@ void TwoCasesDefaultLNL() { } } -// CHECK-LABEL: @_Z18TwoCasesDefaultLNNv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_EPILOG:%.*]] -// CHECK-NEXT: i32 2, label [[SW_EPILOG]] -// CHECK-NEXT: ], !prof !8 -// CHECK: sw.default: -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z18TwoCasesDefaultLNNv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_EPILOG:.*]] +// CHECK-NEXT: i32 2, label %[[SW_EPILOG]] +// CHECK-NEXT: ], !prof [[PROF8]] +// CHECK: [[SW_DEFAULT]]: +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void TwoCasesDefaultLNN() { @@ -337,16 +352,17 @@ void TwoCasesDefaultLNN() { } } -// CHECK-LABEL: @_Z18TwoCasesDefaultLNUv( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ -// CHECK-NEXT: i32 1, label [[SW_EPILOG:%.*]] -// CHECK-NEXT: i32 2, label [[SW_EPILOG]] -// CHECK-NEXT: ], !prof !18 -// CHECK: sw.default: -// CHECK-NEXT: br label [[SW_EPILOG]] -// CHECK: sw.epilog: +// CHECK-LABEL: define dso_local void @_Z18TwoCasesDefaultLNUv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @i, align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// CHECK-NEXT: i32 1, label %[[SW_EPILOG:.*]] +// CHECK-NEXT: i32 2, label %[[SW_EPILOG]] +// CHECK-NEXT: ], !prof [[PROF18:![0-9]+]] +// CHECK: [[SW_DEFAULT]]: +// CHECK-NEXT: br label %[[SW_EPILOG]] +// CHECK: [[SW_EPILOG]]: // CHECK-NEXT: ret void // void TwoCasesDefaultLNU() { @@ -356,3 +372,22 @@ void TwoCasesDefaultLNU() { [[unlikely]] default: break; } } +//. +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[PROF6]] = !{!"branch_weights", i32 357913942, i32 715827883} +// CHECK: [[PROF7]] = !{!"branch_weights", i32 536870912, i32 1} +// CHECK: [[PROF8]] = !{!"branch_weights", i32 238609295, i32 715827883, i32 238609295} +// CHECK: [[PROF9]] = !{!"branch_weights", i32 357913942, i32 1, i32 357913942} +// CHECK: [[PROF10]] = !{!"branch_weights", i32 357913942, i32 715827883, i32 1} +// CHECK: [[PROF11]] = !{!"branch_weights", i32 143165577, i32 143165577, i32 143165577, i32 715827883, i32 143165577} +// CHECK: [[PROF12]] = !{!"branch_weights", i32 214748365, i32 214748365, i32 214748365, i32 1, i32 214748365} +// CHECK: [[PROF13]] = !{!"branch_weights", i32 79536432, i32 79536432, i32 79536432, i32 79536432, i32 79536432, i32 79536432, i32 79536432, i32 715827883, i32 79536432} +// CHECK: [[PROF14]] = !{!"branch_weights", i32 119304648, i32 119304648, i32 119304648, i32 119304648, i32 119304648, i32 119304648, i32 119304648, i32 1, i32 119304648} +// CHECK: [[PROF15]] = !{!"branch_weights", i32 715827883, i32 357913942} +// CHECK: [[PROF16]] = !{!"branch_weights", i32 1, i32 536870912} +// CHECK: [[PROF17]] = !{!"branch_weights", i32 536870912, i32 536870912, i32 268435456} +// CHECK: [[PROF18]] = !{!"branch_weights", i32 1, i32 715827883, i32 357913942} +//. diff --git a/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp b/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp index c1ee5af7254a0..d4b4f3030d117 100644 --- a/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp +++ b/clang/test/CodeGenCXX/cfi-mfcall-nomerge.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 6 // with MERGE/NO-MERGE assertions added manually. // N.B. although the clang driver defaults to merge, clang_cc1 defaults to non-merge. @@ -29,7 +29,7 @@ void f(S *s, void (S::*p)()) { // NO-MERGE-NEXT: [[MEMPTR_ISVIRTUAL_NOT:%.*]] = icmp eq i64 [[TMP1]], 0 // NO-MERGE-NEXT: br i1 [[MEMPTR_ISVIRTUAL_NOT]], label %[[MEMPTR_NONVIRTUAL:.*]], label %[[MEMPTR_VIRTUAL:.*]] // NO-MERGE: [[MEMPTR_VIRTUAL]]: -// NO-MERGE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2:![0-9]+]] +// NO-MERGE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2:![0-9]+]] // NO-MERGE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VTABLE]], i64 [[P_COERCE0]] // NO-MERGE-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -1 // NO-MERGE-NEXT: [[TMP4:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP3]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5:![0-9]+]] @@ -49,7 +49,7 @@ void f(S *s, void (S::*p)()) { // NO-MERGE: [[MEMPTR_VIRTUAL7]]: // NO-MERGE-NEXT: [[MEMPTR_VIRTUALFN:%.*]] = load ptr, ptr [[TMP3]], align 8, !nosanitize [[META5]] // NO-MERGE-NEXT: tail call void [[MEMPTR_VIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR5:[0-9]+]] -// NO-MERGE-NEXT: [[VTABLE8:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2]] +// NO-MERGE-NEXT: [[VTABLE8:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]] // NO-MERGE-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[VTABLE8]], i64 [[P_COERCE0]] // NO-MERGE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -1 // NO-MERGE-NEXT: [[TMP10:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP9]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]] @@ -60,7 +60,7 @@ void f(S *s, void (S::*p)()) { // NO-MERGE: [[MEMPTR_VIRTUAL19]]: // NO-MERGE-NEXT: [[MEMPTR_VIRTUALFN9:%.*]] = load ptr, ptr [[TMP9]], align 8, !nosanitize [[META5]] // NO-MERGE-NEXT: tail call void [[MEMPTR_VIRTUALFN9]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR5]] -// NO-MERGE-NEXT: [[VTABLE20:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2]] +// NO-MERGE-NEXT: [[VTABLE20:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]] // NO-MERGE-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[VTABLE20]], i64 [[P_COERCE0]] // NO-MERGE-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i64 -1 // NO-MERGE-NEXT: [[TMP13:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP12]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]] @@ -84,7 +84,7 @@ void f(S *s, void (S::*p)()) { // MERGE-NEXT: [[MEMPTR_ISVIRTUAL_NOT:%.*]] = icmp eq i64 [[TMP1]], 0 // MERGE-NEXT: br i1 [[MEMPTR_ISVIRTUAL_NOT]], label %[[MEMPTR_NONVIRTUAL:.*]], label %[[MEMPTR_VIRTUAL:.*]] // MERGE: [[MEMPTR_VIRTUAL]]: -// MERGE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2:![0-9]+]] +// MERGE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2:![0-9]+]] // MERGE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VTABLE]], i64 [[P_COERCE0]] // MERGE-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -1 // MERGE-NEXT: [[TMP4:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP3]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5:![0-9]+]] @@ -101,7 +101,7 @@ void f(S *s, void (S::*p)()) { // MERGE: [[MEMPTR_VIRTUAL6]]: // MERGE-NEXT: [[MEMPTR_VIRTUALFN:%.*]] = load ptr, ptr [[TMP3]], align 8, !nosanitize [[META5]] // MERGE-NEXT: tail call void [[MEMPTR_VIRTUALFN]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4:[0-9]+]] -// MERGE-NEXT: [[VTABLE7:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2]] +// MERGE-NEXT: [[VTABLE7:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]] // MERGE-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[VTABLE7]], i64 [[P_COERCE0]] // MERGE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -1 // MERGE-NEXT: [[TMP10:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP9]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]] @@ -109,7 +109,7 @@ void f(S *s, void (S::*p)()) { // MERGE: [[MEMPTR_VIRTUAL17]]: // MERGE-NEXT: [[MEMPTR_VIRTUALFN8:%.*]] = load ptr, ptr [[TMP9]], align 8, !nosanitize [[META5]] // MERGE-NEXT: tail call void [[MEMPTR_VIRTUALFN8]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4]] -// MERGE-NEXT: [[VTABLE18:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA2]] +// MERGE-NEXT: [[VTABLE18:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[VTABLE_POINTER_TBAA2]] // MERGE-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[VTABLE18]], i64 [[P_COERCE0]] // MERGE-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i64 -1 // MERGE-NEXT: [[TMP13:%.*]] = tail call i1 @llvm.type.test(ptr [[TMP12]], metadata !"_ZTSM1SFvvE.virtual"), !nosanitize [[META5]] @@ -123,9 +123,17 @@ void f(S *s, void (S::*p)()) { // MERGE-NEXT: [[TMP14:%.*]] = phi ptr [ [[MEMPTR_VIRTUALFN19]], %[[MEMPTR_VIRTUAL17]] ], [ [[MEMPTR_NONVIRTUALFN]], %[[MEMPTR_NONVIRTUAL21]] ] // MERGE-NEXT: tail call void [[TMP14]](ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) #[[ATTR4]] // MERGE-NEXT: ret void - -// MERGE: [[ATTR3]] = { noreturn nounwind } -// MERGE: [[ATTR4]] = { nounwind } - -// NO-MERGE: [[ATTR4]] = { nomerge noreturn nounwind } -// NO-MERGE: [[ATTR5]] = { nounwind } +// +//. +// NO-MERGE: [[VTABLE_POINTER_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// NO-MERGE: [[META3]] = !{!"vtable pointer", [[META4:![0-9]+]], i64 0} +// NO-MERGE: [[META4]] = !{!"Simple C++ TBAA"} +// NO-MERGE: [[META5]] = !{} +// NO-MERGE: [[PROF6]] = !{!"branch_weights", i32 1048575, i32 1} +//. +// MERGE: [[VTABLE_POINTER_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// MERGE: [[META3]] = !{!"vtable pointer", [[META4:![0-9]+]], i64 0} +// MERGE: [[META4]] = !{!"Simple C++ TBAA"} +// MERGE: [[META5]] = !{} +// MERGE: [[PROF6]] = !{!"branch_weights", i32 1048575, i32 1} +//. diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp index 5f83545f78127..ab3695a3d9ce3 100644 --- a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp +++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: webassembly-registered-target // Simple calls to known variadic functions that are completely elided when @@ -33,32 +33,32 @@ template static Y second(...) { extern "C" { -// CHECK-LABEL: define {{[^@]+}}@first_pair_i32 -// CHECK-SAME: (i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef i32 @first_pair_i32( +// CHECK-SAME: i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i32 [[X]] // int first_pair_i32(int x, int y) { return first(x, y); } -// CHECK-LABEL: define {{[^@]+}}@second_pair_i32 -// CHECK-SAME: (i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef i32 @second_pair_i32( +// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i32 [[Y]] // int second_pair_i32(int x, int y) { return second(x, y); } -// CHECK-LABEL: define {{[^@]+}}@first_pair_f64 -// CHECK-SAME: (double noundef returned [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef double @first_pair_f64( +// CHECK-SAME: double noundef returned [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret double [[X]] // double first_pair_f64(double x, double y) { return first(x, y); } -// CHECK-LABEL: define {{[^@]+}}@second_pair_f64 -// CHECK-SAME: (double noundef [[X:%.*]], double noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef double @second_pair_f64( +// CHECK-SAME: double noundef [[X:%.*]], double noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret double [[Y]] // double second_pair_f64(double x, double y) { @@ -68,30 +68,30 @@ double second_pair_f64(double x, double y) { extern "C" { -// CHECK-LABEL: define {{[^@]+}}@first_i32_f64 -// CHECK-SAME: (i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef i32 @first_i32_f64( +// CHECK-SAME: i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i32 [[X]] // int first_i32_f64(int x, double y) { return first(x, y); } -// CHECK-LABEL: define {{[^@]+}}@second_i32_f64 -// CHECK-SAME: (i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef double @second_i32_f64( +// CHECK-SAME: i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret double [[Y]] // double second_i32_f64(int x, double y) { return second(x, y); } -// CHECK-LABEL: define {{[^@]+}}@first_f64_i32 -// CHECK-SAME: (double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef double @first_f64_i32( +// CHECK-SAME: double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret double [[X]] // double first_f64_i32(double x, int y) { return first(x, y); } -// CHECK-LABEL: define {{[^@]+}}@second_f64_i32 -// CHECK-SAME: (double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef i32 @second_f64_i32( +// CHECK-SAME: double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i32 [[Y]] // int second_f64_i32(double x, int y) { return second(x, y); } @@ -100,38 +100,38 @@ int second_f64_i32(double x, int y) { return second(x, y); } extern "C" { typedef uint64_t ulong2 __attribute__((__vector_size__(16), __aligned__(16))); -// CHECK-LABEL: define {{[^@]+}}@first_i32_ulong2 -// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef i32 @first_i32_ulong2( +// CHECK-SAME: i32 noundef returned [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i32 [[X]] // int first_i32_ulong2(int x, ulong2 *y) { return first(x, *y); } -// CHECK-LABEL: define {{[^@]+}}@second_i32_ulong2 -// CHECK-SAME: (i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define void @second_i32_ulong2( +// CHECK-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[INT_TBAA2:![0-9]+]] +// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[INT_TBAA2]] // CHECK-NEXT: ret void // void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) { *r = second(x, *y); } -// CHECK-LABEL: define {{[^@]+}}@first_ulong2_i32 -// CHECK-SAME: (ptr noundef readonly captures(none) [[X:%.*]], i32 noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[TBAA2]] -// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]] +// CHECK-LABEL: define void @first_ulong2_i32( +// CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]], i32 noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[INT_TBAA2]] // CHECK-NEXT: ret void // void first_ulong2_i32(ulong2 *x, int y, ulong2 *r) { *r = first(*x, y); } -// CHECK-LABEL: define {{[^@]+}}@second_ulong2_i32 -// CHECK-SAME: (ptr noundef readonly captures(none) [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef i32 @second_ulong2_i32( +// CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i32 [[Y]] // int second_ulong2_i32(ulong2 *x, int y) { return second(*x, y); } @@ -149,33 +149,38 @@ typedef struct { extern "C" { -// CHECK-LABEL: define {{[^@]+}}@first_i32_asc -// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef i32 @first_i32_asc( +// CHECK-SAME: i32 noundef returned [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i32 [[X]] // int first_i32_asc(int x, asc *y) { return first(x, *y); } -// CHECK-LABEL: define {{[^@]+}}@second_i32_asc -// CHECK-SAME: (i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define void @second_i32_asc( +// CHECK-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[Y]], i32 24, i1 false) // CHECK-NEXT: ret void // void second_i32_asc(int x, asc *y, asc *r) { *r = second(x, *y); } -// CHECK-LABEL: define {{[^@]+}}@first_asc_i32 -// CHECK-SAME: (ptr noundef readonly captures(none) [[X:%.*]], i32 noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define void @first_asc_i32( +// CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]], i32 noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[X]], i32 24, i1 false) // CHECK-NEXT: ret void // void first_asc_i32(asc *x, int y, asc *r) { *r = first(*x, y); } -// CHECK-LABEL: define {{[^@]+}}@second_asc_i32 -// CHECK-SAME: (ptr noundef readonly captures(none) [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define noundef i32 @second_asc_i32( +// CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]], i32 noundef returned [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i32 [[Y]] // int second_asc_i32(asc *x, int y) { return second(*x, y); } } +//. +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C++ TBAA"} +//. diff --git a/clang/test/CodeGenCXX/load-reference-metadata.cpp b/clang/test/CodeGenCXX/load-reference-metadata.cpp index daceb752a732b..abfdd055c3ad6 100644 --- a/clang/test/CodeGenCXX/load-reference-metadata.cpp +++ b/clang/test/CodeGenCXX/load-reference-metadata.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -std=c++11 -O1 -disable-llvm-passes %s -o - | FileCheck %s struct alignas(32) F { int x; }; @@ -13,20 +13,20 @@ struct S { // CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[TBAA2]], !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] +// CHECK-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2]], !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] // CHECK-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !tbaa [[TBAA9:![0-9]+]], !nonnull [[META7]] -// CHECK-NEXT: store i8 0, ptr [[TMP1]], align 1, !tbaa [[TBAA14:![0-9]+]] -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[TBAA2]], !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !tbaa [[CHARPTR_TBAA9:![0-9]+]], !nonnull [[META7]] +// CHECK-NEXT: store i8 0, ptr [[TMP1]], align 1, !tbaa [[CHAR_TBAA14:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2]], !nonnull [[META7]], !align [[META8]] // CHECK-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP2]], i32 0, i32 1 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B]], align 8, !tbaa [[TBAA15:![0-9]+]], !nonnull [[META7]], !align [[META16:![0-9]+]] -// CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4, !tbaa [[TBAA17:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[TBAA2]], !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B]], align 8, !tbaa [[INTPTR_TBAA15:![0-9]+]], !nonnull [[META7]], !align [[META16:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4, !tbaa [[INT_TBAA17:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1SPTR_TBAA2]], !nonnull [[META7]], !align [[META8]] // CHECK-NEXT: [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i32 0, i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8, !tbaa [[TBAA19:![0-9]+]], !nonnull [[META7]], !align [[META20:![0-9]+]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8, !tbaa [[_ZTS1FPTR_TBAA19:![0-9]+]], !nonnull [[META7]], !align [[META20:![0-9]+]] // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_F:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK-NEXT: store i32 0, ptr [[X]], align 32, !tbaa [[TBAA21:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[X]], align 32, !tbaa [[INT_TBAA21:![0-9]+]] // CHECK-NEXT: ret void // void test(S &s) { @@ -42,13 +42,13 @@ extern B (&bb)[2]; // CHECK-LABEL: define dso_local void @_Z13test_externalv( // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @b, align 8, !tbaa [[TBAA23:![0-9]+]], !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @b, align 8, !tbaa [[_ZTS1BPTR_TBAA23:![0-9]+]], !nonnull [[META7]], !align [[META8]] // CHECK-NEXT: [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_B:%.*]], ptr [[TMP0]], i32 0, i32 2 -// CHECK-NEXT: store i8 0, ptr [[C]], align 8, !tbaa [[TBAA25:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr @bb, align 8, !tbaa [[TBAA23]], !nonnull [[META7]], !align [[META20]] +// CHECK-NEXT: store i8 0, ptr [[C]], align 8, !tbaa [[CHAR_TBAA25:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr @bb, align 8, !tbaa [[_ZTS1BPTR_TBAA23]], !nonnull [[META7]], !align [[META20]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x %struct.B], ptr [[TMP1]], i64 0, i64 0 // CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[ARRAYIDX]], i32 0, i32 2 -// CHECK-NEXT: store i8 0, ptr [[C1]], align 16, !tbaa [[TBAA25]] +// CHECK-NEXT: store i8 0, ptr [[C1]], align 16, !tbaa [[CHAR_TBAA25]] // CHECK-NEXT: ret void // void test_external() { @@ -60,8 +60,8 @@ void test_external() { // CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(17) [[S:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[TBAA23]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[TBAA23]], !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1BPTR_TBAA23]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8, !tbaa [[_ZTS1BPTR_TBAA23]], !nonnull [[META7]], !align [[META8]] // CHECK-NEXT: [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_B:%.*]], ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: ret ptr [[C]] // @@ -69,30 +69,30 @@ char* test_deref_only(B &s) { return &s.c; } //. -// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[_ZTS1SPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK: [[META3]] = !{!"p1 _ZTS1S", [[META4:![0-9]+]], i64 0} // CHECK: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0} // CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} // CHECK: [[META6]] = !{!"Simple C++ TBAA"} // CHECK: [[META7]] = !{} // CHECK: [[META8]] = !{i64 8} -// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META11:![0-9]+]], i64 0} +// CHECK: [[CHARPTR_TBAA9]] = !{[[META10:![0-9]+]], [[META11:![0-9]+]], i64 0} // CHECK: [[META10]] = !{!"_ZTS1S", [[META11]], i64 0, [[META12:![0-9]+]], i64 8, [[META13:![0-9]+]], i64 16} // CHECK: [[META11]] = !{!"p1 omnipotent char", [[META4]], i64 0} // CHECK: [[META12]] = !{!"p1 int", [[META4]], i64 0} // CHECK: [[META13]] = !{!"p1 _ZTS1F", [[META4]], i64 0} -// CHECK: [[TBAA14]] = !{[[META5]], [[META5]], i64 0} -// CHECK: [[TBAA15]] = !{[[META10]], [[META12]], i64 8} +// CHECK: [[CHAR_TBAA14]] = !{[[META5]], [[META5]], i64 0} +// CHECK: [[INTPTR_TBAA15]] = !{[[META10]], [[META12]], i64 8} // CHECK: [[META16]] = !{i64 4} -// CHECK: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// CHECK: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} // CHECK: [[META18]] = !{!"int", [[META5]], i64 0} -// CHECK: [[TBAA19]] = !{[[META10]], [[META13]], i64 16} +// CHECK: [[_ZTS1FPTR_TBAA19]] = !{[[META10]], [[META13]], i64 16} // CHECK: [[META20]] = !{i64 32} -// CHECK: [[TBAA21]] = !{[[META22:![0-9]+]], [[META18]], i64 0} +// CHECK: [[INT_TBAA21]] = !{[[META22:![0-9]+]], [[META18]], i64 0} // CHECK: [[META22]] = !{!"_ZTS1F", [[META18]], i64 0} -// CHECK: [[TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0} +// CHECK: [[_ZTS1BPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0} // CHECK: [[META24]] = !{!"p1 _ZTS1B", [[META4]], i64 0} -// CHECK: [[TBAA25]] = !{[[META26:![0-9]+]], [[META5]], i64 16} +// CHECK: [[CHAR_TBAA25]] = !{[[META26:![0-9]+]], [[META5]], i64 16} // CHECK: [[META26]] = !{!"_ZTS1B", [[META27:![0-9]+]], i64 8, [[META5]], i64 16} // CHECK: [[META27]] = !{!"long long", [[META5]], i64 0} //. diff --git a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl index f9d7968fc5570..b55f663d6d948 100644 --- a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl +++ b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s @@ -11,9 +11,9 @@ AA getAA(void *p); __amdgpu_buffer_rsrc_t getBufferImpl(void *p); void consumeBuffer(__amdgpu_buffer_rsrc_t); -// CHECK-LABEL: define {{[^@]+}}@getBuffer -// CHECK-SAME: (ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local ptr addrspace(8) @getBuffer( +// CHECK-SAME: ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CALL:%.*]] = tail call ptr addrspace(8) @getBufferImpl(ptr addrspace(5) noundef [[P]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret ptr addrspace(8) [[CALL]] // @@ -21,16 +21,16 @@ __amdgpu_buffer_rsrc_t getBuffer(void *p) { return getBufferImpl(p); } -// CHECK-LABEL: define {{[^@]+}}@consumeBufferPtr -// CHECK-SAME: (ptr addrspace(5) noundef readonly captures(address) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @consumeBufferPtr( +// CHECK-SAME: ptr addrspace(5) noundef readonly captures(address) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq ptr addrspace(5) [[P]], addrspacecast (ptr null to ptr addrspace(5)) -// CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -// CHECK: if.then: -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[P]], align 16, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[P]], align 16, !tbaa [[__AMDGPU_BUFFER_RSRC_T_TBAA4:![0-9]+]] // CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[TMP0]]) #[[ATTR2]] -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: // CHECK-NEXT: ret void // void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) { @@ -38,20 +38,20 @@ void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) { consumeBuffer(*p); } -// CHECK-LABEL: define {{[^@]+}}@test -// CHECK-SAME: (ptr addrspace(5) noundef readonly captures(address) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]] +// CHECK-LABEL: define dso_local void @test( +// CHECK-SAME: ptr addrspace(5) noundef readonly captures(address) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[INT_TBAA8:![0-9]+]] // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 // CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5)) // CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL_NOT]], i1 true, i1 [[TOBOOL_NOT_I]] -// CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]] -// CHECK: if.then.i: +// CHECK-NEXT: br i1 [[OR_COND]], label %[[IF_END:.*]], label %[[IF_THEN_I:.*]] +// CHECK: [[IF_THEN_I]]: // CHECK-NEXT: [[R:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[A]], i32 16 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[R]], align 16, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[R]], align 16, !tbaa [[__AMDGPU_BUFFER_RSRC_T_TBAA4]] // CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[TMP1]]) #[[ATTR2]] -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: +// CHECK-NEXT: br label %[[IF_END]] +// CHECK: [[IF_END]]: // CHECK-NEXT: ret void // void test(AA *a) { @@ -59,18 +59,18 @@ void test(AA *a) { consumeBufferPtr(&(a->r)); } -// CHECK-LABEL: define {{[^@]+}}@bar -// CHECK-SAME: (ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local %struct.AA_ty @bar( +// CHECK-SAME: ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CALL:%.*]] = tail call [[STRUCT_AA_TY:%.*]] @[[GETAA:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr addrspace(5) noundef [[P]]) #[[ATTR2]] // CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_AA_TY]] [[CALL]], 0 // CHECK-NEXT: [[CALL_I:%.*]] = tail call ptr addrspace(8) @getBufferImpl(ptr addrspace(5) noundef [[P]]) #[[ATTR2]] // CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 -// CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label [[TEST_EXIT:%.*]], label [[IF_THEN_I_I:%.*]] -// CHECK: if.then.i.i: +// CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[TEST_EXIT:.*]], label %[[IF_THEN_I_I:.*]] +// CHECK: [[IF_THEN_I_I]]: // CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[CALL_I]]) #[[ATTR2]] -// CHECK-NEXT: br label [[TEST_EXIT]] -// CHECK: test.exit: +// CHECK-NEXT: br label %[[TEST_EXIT]] +// CHECK: [[TEST_EXIT]]: // CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_AA_TY]] [[CALL]], ptr addrspace(8) [[CALL_I]], 1 // CHECK-NEXT: ret [[STRUCT_AA_TY]] [[DOTFCA_1_INSERT]] // @@ -80,3 +80,12 @@ AA bar(void *p) { test(&a); return a; } +//. +// CHECK: [[__AMDGPU_BUFFER_RSRC_T_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK: [[META5]] = !{!"__amdgpu_buffer_rsrc_t", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK: [[META7]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[INT_TBAA8]] = !{[[META9:![0-9]+]], [[META10:![0-9]+]], i64 0} +// CHECK: [[META9]] = !{!"AA_ty", [[META10]], i64 0, [[META5]], i64 16} +// CHECK: [[META10]] = !{!"int", [[META6]], i64 0} +//. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index d71c89811f04b..6d573238440d2 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 6 // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefixes=CHECK,NOCPU // // Check no-optnone and target-cpu behavior @@ -451,13 +451,13 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // GFX900-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr // GFX900-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// GFX900-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3:![0-9]+]] -// GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]] -// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7:![0-9]+]] +// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]] -// GFX900-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: ret void // // @@ -473,14 +473,14 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr // GFX900-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr // GFX900-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14:![0-9]+]] -// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16:![0-9]+]] -// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] -// GFX900-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] -// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14:![0-9]+]] +// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR8:[0-9]+]] // GFX900-NEXT: ret void // @@ -519,16 +519,16 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr // GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr // GFX900-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr -// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] -// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] -// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9:[0-9]+]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] -// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17:![0-9]+]] +// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17:![0-9]+]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] -// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19:![0-9]+]] -// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19:![0-9]+]] +// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] // GFX900-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8 @@ -537,14 +537,14 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2 // GFX900-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3 -// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[TBAA14]] +// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[CHARPTR_TBAA14]] // GFX900-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4 -// GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] -// GFX900-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr [[BLOCK_ASCAST]]) -// GFX900-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]] -// GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]] +// GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP2_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8 @@ -553,20 +553,20 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2 // GFX900-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3 -// GFX900-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[TBAA14]] +// GFX900-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[CHARPTR_TBAA14]] // GFX900-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6 -// GFX900-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] -// GFX900-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4 -// GFX900-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[LONGPTR_TBAA7]] // GFX900-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5 -// GFX900-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr addrspace(5) [[VARTMP2]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr [[BLOCK3_ASCAST]]) -// GFX900-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]] -// GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]] +// GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP11_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8 @@ -575,17 +575,17 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2 // GFX900-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3 -// GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[TBAA14]] +// GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA14]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[CHARPTR_TBAA14]] // GFX900-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6 -// GFX900-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] -// GFX900-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4 -// GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[LONGPTR_TBAA7]] // GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5 -// GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]] // GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0 // GFX900-NEXT: store i64 100, ptr addrspace(5) [[TMP18]], align 8 @@ -599,16 +599,16 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2 // GFX900-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3 -// GFX900-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4 -// GFX900-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA16]] -// GFX900-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]] -// GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[LONGPTR_TBAA7]] +// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[CHAR_TBAA16]] +// GFX900-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]] +// GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP27_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] -// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr [[BLOCK21_ASCAST]]) // GFX900-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]] // GFX900-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] @@ -623,8 +623,8 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26:![0-9]+]] -// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]] +// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA26:![0-9]+]] +// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA26]] // GFX900-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR8]] // GFX900-NEXT: ret void // @@ -640,14 +640,14 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) // GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr // GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]] +// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[INTPTR_TBAA26]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] -// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] // GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]] -// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[QUEUE_T_TBAA19]] +// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[INT_TBAA17]] // GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) // GFX900-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] @@ -664,11 +664,11 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 -// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA14]] // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 -// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]] +// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: ret void // // @@ -691,17 +691,17 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA14]] // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 -// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]] +// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 -// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 -// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[LONGPTR_TBAA7]] // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 -// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: ret void // // @@ -725,22 +725,22 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 -// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA32:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[ANYPTR_TBAA32:![0-9]+]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[CHARPTR_TBAA14]] // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 -// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]] +// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 -// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 -// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[LONGPTR_TBAA7]] // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 -// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA32]] +// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[LONG_TBAA3]] +// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[ANYPTR_TBAA32]] // GFX900-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0 -// GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[INT_TBAA17]] // GFX900-NEXT: ret void // // @@ -763,9 +763,9 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 -// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[LONG_TBAA3]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[LONGPTR_TBAA7]] // GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8]] // GFX900-NEXT: ret void // @@ -852,36 +852,36 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0} -// GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// GFX900: [[LONG_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} // GFX900: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} // GFX900: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} // GFX900: [[META6]] = !{!"Simple C/C++ TBAA"} -// GFX900: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// GFX900: [[LONGPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} // GFX900: [[META8]] = !{!"p1 long", [[META9:![0-9]+]], i64 0} // GFX900: [[META9]] = !{!"any pointer", [[META5]], i64 0} // GFX900: [[META10]] = !{i32 1, i32 0, i32 1, i32 0} // GFX900: [[META11]] = !{!"none", !"none", !"none", !"none"} // GFX900: [[META12]] = !{!"char*", !"char", !"long*", !"long"} // GFX900: [[META13]] = !{!"", !"", !"", !""} -// GFX900: [[TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +// GFX900: [[CHARPTR_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} // GFX900: [[META15]] = !{!"p1 omnipotent char", [[META9]], i64 0} -// GFX900: [[TBAA16]] = !{[[META5]], [[META5]], i64 0} -// GFX900: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// GFX900: [[CHAR_TBAA16]] = !{[[META5]], [[META5]], i64 0} +// GFX900: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} // GFX900: [[META18]] = !{!"int", [[META5]], i64 0} -// GFX900: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} +// GFX900: [[QUEUE_T_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} // GFX900: [[META20]] = !{!"queue_t", [[META5]], i64 0} -// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[TBAA17]]} +// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[INT_TBAA17]]} // GFX900: [[META22]] = !{i32 1} // GFX900: [[META23]] = !{!"none"} // GFX900: [[META24]] = !{!"int*"} // GFX900: [[META25]] = !{!""} -// GFX900: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} +// GFX900: [[INTPTR_TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} // GFX900: [[META27]] = !{!"p1 int", [[META9]], i64 0} // GFX900: [[META28]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle} // GFX900: [[META29]] = !{i32 0} // GFX900: [[META30]] = !{!"__block_literal"} // GFX900: [[META31]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle} -// GFX900: [[TBAA32]] = !{[[META9]], [[META9]], i64 0} +// GFX900: [[ANYPTR_TBAA32]] = !{[[META9]], [[META9]], i64 0} // GFX900: [[META33]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle} // GFX900: [[META34]] = !{i32 0, i32 3} // GFX900: [[META35]] = !{!"none", !"none"} diff --git a/clang/test/CodeGenOpenCL/amdgpu-printf.cl b/clang/test/CodeGenOpenCL/amdgpu-printf.cl index b9e25172a56af..cea7ee576d822 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-printf.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-printf.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 4 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 6 // RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); @@ -17,60 +17,60 @@ __kernel void test_printf_str_int(int i) { } // CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_noargs( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_noargs() #[[ATTR5:[0-9]+]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_noargs( // CHECK-SAME: ) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR6:[0-9]+]] +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR5]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_int( // CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5:![0-9]+]] !kernel_arg_access_qual [[META6:![0-9]+]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8:![0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] // CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_int(i32 noundef [[TMP0]]) #[[ATTR5]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_int( // CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR6]] +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR5]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_str_int( // CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] // CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_str_int(i32 noundef [[TMP0]]) #[[ATTR5]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_str_int( // CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5) -// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[S]]) #[[ATTR7:[0-9]+]] +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[S]]) #[[ATTR6:[0-9]+]] // CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false) // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP0]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[S]]) #[[ATTR7]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[INT_TBAA9]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP0]]) #[[ATTR5]] +// CHECK-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[S]]) #[[ATTR6]] // CHECK-NEXT: ret void // //. @@ -79,7 +79,7 @@ __kernel void test_printf_str_int(int i) { // CHECK: [[META6]] = !{!"none"} // CHECK: [[META7]] = !{!"int"} // CHECK: [[META8]] = !{!""} -// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// CHECK: [[INT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} // CHECK: [[META10]] = !{!"int", [[META11:![0-9]+]], i64 0} // CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0} // CHECK: [[META12]] = !{!"Simple C/C++ TBAA"} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl index cddc323cb27a5..321835cc3d28d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200 @@ -14,10 +14,11 @@ typedef int v8i __attribute__((ext_vector_type(8))); // amdgcn_wmma_f32_16x16x16_f16 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c) @@ -29,10 +30,11 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c) // amdgcn_wmma_f32_16x16x16_bf16 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v8s a, v8s b, v8f c) @@ -44,10 +46,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v8s a, v8s b, v8f c // amdgcn_wmma_f16_16x16x16_f16 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x half> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v8h* out, v8h a, v8h b, v8h c) @@ -59,10 +62,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v8h* out, v8h a, v8h b, v8h c) // amdgcn_wmma_bf16_16x16x16_bf16 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x i16> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8s* out, v8s a, v8s b, v8s c) @@ -74,10 +78,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8s* out, v8s a, v8s b, v8s // amdgcn_wmma_i32_16x16x16_iu8 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c) @@ -89,10 +94,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c) // amdgcn_wmma_i32_16x16x16_iu4 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <8 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <8 x i32> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, int a, int b, v8i c) @@ -100,10 +106,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, int a, int b, v8i c) *out = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12(true, a, true, b, c, false); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v8f* out, v2i a, v2i b, v8f c) @@ -111,10 +118,11 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v8f* out, v2i a, v2i b, v8 *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12(a, b, c); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v8f* out, v2i a, v2i b, v8f c) @@ -122,10 +130,11 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v8f* out, v2i a, v2i b, v8 *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12(a, b, c); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v8f* out, v2i a, v2i b, v8f c) @@ -133,10 +142,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v8f* out, v2i a, v2i b, v8 *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12(a, b, c); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v8f* out, v2i a, v2i b, v8f c) @@ -144,13 +154,19 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v8f* out, v2i a, v2i b, v8 *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12(a, b, c); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x32_iu4_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x32_iu4_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v8i* out, v2i a, v2i b, v8i c) { *out = __builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12(true, a, true, b, c, false); } +//. +// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl index 1c1d273eda771..8b5b31537ce58 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200 @@ -13,10 +13,11 @@ typedef int v4i __attribute__((ext_vector_type(4))); // amdgcn_wmma_f32_16x16x16_f16 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]], <4 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c) @@ -28,10 +29,11 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c) // amdgcn_wmma_f32_16x16x16_bf16 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v4s a, v4s b, v4f c) @@ -43,10 +45,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v4s a, v4s b, v4f c // amdgcn_wmma_f16_16x16x16_f16 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]], <4 x half> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x half> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v4h* out, v4h a, v4h b, v4h c) @@ -58,10 +61,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v4h* out, v4h a, v4h b, v4h c) // amdgcn_wmma_bf16_16x16x16_bf16 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x i16> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4s* out, v4s a, v4s b, v4s c) @@ -73,10 +77,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4s* out, v4s a, v4s b, v4s // amdgcn_wmma_i32_16x16x16_iu8 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c) @@ -88,10 +93,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c) // amdgcn_wmma_i32_16x16x16_iu4 // -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, int a, int b, v4i c) @@ -99,10 +105,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, int a, int b, v4i c) *out = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12(true, a, true, b, c, false); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v4f* out, int a, int b, v4f c) @@ -110,10 +117,11 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v4f* out, int a, int b, v4 *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12(a, b, c); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v4f* out, int a, int b, v4f c) @@ -121,10 +129,11 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v4f* out, int a, int b, v4 *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12(a, b, c); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v4f* out, int a, int b, v4f c) @@ -132,10 +141,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v4f* out, int a, int b, v4 *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12(a, b, c); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v4f* out, int a, int b, v4f c) @@ -143,13 +153,19 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v4f* out, int a, int b, v4 *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12(a, b, c); } -// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x32_iu4_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x32_iu4_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false) +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v4i* out, int a, int b, v4i c) { *out = __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12(true, a, true, b, c, false); } +//. +// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl index c645d52cc7e38..e03ae66f92035 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl @@ -1,13 +1,14 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250 typedef int v2i __attribute__((ext_vector_type(2))); typedef int v4i __attribute__((ext_vector_type(4))); -// CHECK-GFX1250-LABEL: @test_amdgcn_cluster_load_async_to_lds_b8( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.cluster.load.async.to.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0, i32 [[MASK:%.*]]) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_cluster_load_async_to_lds_b8( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]], i32 noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.cluster.load.async.to.lds.b8(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0, i32 [[MASK]]) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_cluster_load_async_to_lds_b8(global char* gaddr, local char* laddr, int mask) @@ -15,9 +16,10 @@ void test_amdgcn_cluster_load_async_to_lds_b8(global char* gaddr, local char* la __builtin_amdgcn_cluster_load_async_to_lds_b8(gaddr, laddr, 16, 0, mask); } -// CHECK-GFX1250-LABEL: @test_amdgcn_cluster_load_async_to_lds_b32( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.cluster.load.async.to.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0, i32 [[MASK:%.*]]) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_cluster_load_async_to_lds_b32( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]], i32 noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.cluster.load.async.to.lds.b32(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0, i32 [[MASK]]) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_cluster_load_async_to_lds_b32(global int* gaddr, local int* laddr, int mask) @@ -25,9 +27,10 @@ void test_amdgcn_cluster_load_async_to_lds_b32(global int* gaddr, local int* lad __builtin_amdgcn_cluster_load_async_to_lds_b32(gaddr, laddr, 16, 0, mask); } -// CHECK-GFX1250-LABEL: @test_amdgcn_cluster_load_async_to_lds_b64( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0, i32 [[MASK:%.*]]) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_cluster_load_async_to_lds_b64( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]], i32 noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0, i32 [[MASK]]) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_cluster_load_async_to_lds_b64(global v2i* gaddr, local v2i* laddr, int mask) @@ -35,9 +38,10 @@ void test_amdgcn_cluster_load_async_to_lds_b64(global v2i* gaddr, local v2i* lad __builtin_amdgcn_cluster_load_async_to_lds_b64(gaddr, laddr, 16, 0, mask); } -// CHECK-GFX1250-LABEL: @test_amdgcn_cluster_load_async_to_lds_b128( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.cluster.load.async.to.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0, i32 [[MASK:%.*]]) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_cluster_load_async_to_lds_b128( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]], i32 noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.cluster.load.async.to.lds.b128(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0, i32 [[MASK]]) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_cluster_load_async_to_lds_b128(global v4i* gaddr, local v4i* laddr, int mask) @@ -45,9 +49,10 @@ void test_amdgcn_cluster_load_async_to_lds_b128(global v4i* gaddr, local v4i* la __builtin_amdgcn_cluster_load_async_to_lds_b128(gaddr, laddr, 16, 0, mask); } -// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b8( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_load_async_to_lds_b8( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_global_load_async_to_lds_b8( global char* gaddr, local char* laddr) @@ -55,9 +60,10 @@ void test_amdgcn_global_load_async_to_lds_b8( global char* gaddr, local char* la __builtin_amdgcn_global_load_async_to_lds_b8(gaddr, laddr, 16, 0); } -// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b32( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_load_async_to_lds_b32( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_global_load_async_to_lds_b32(global int* gaddr, local int* laddr) @@ -65,9 +71,10 @@ void test_amdgcn_global_load_async_to_lds_b32(global int* gaddr, local int* ladd __builtin_amdgcn_global_load_async_to_lds_b32(gaddr, laddr, 16, 0); } -// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b64( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_load_async_to_lds_b64( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_global_load_async_to_lds_b64(global v2i* gaddr, local v2i* laddr) @@ -75,9 +82,10 @@ void test_amdgcn_global_load_async_to_lds_b64(global v2i* gaddr, local v2i* ladd __builtin_amdgcn_global_load_async_to_lds_b64(gaddr, laddr, 16, 0); } -// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b128( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_load_async_to_lds_b128( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef writeonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_global_load_async_to_lds_b128( global v4i* gaddr, local v4i* laddr) @@ -85,9 +93,10 @@ void test_amdgcn_global_load_async_to_lds_b128( global v4i* gaddr, local v4i* la __builtin_amdgcn_global_load_async_to_lds_b128(gaddr, laddr, 16, 0); } -// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b8( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_store_async_from_lds_b8( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef readonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_global_store_async_from_lds_b8(global char* gaddr, local char* laddr) @@ -95,9 +104,10 @@ void test_amdgcn_global_store_async_from_lds_b8(global char* gaddr, local char* __builtin_amdgcn_global_store_async_from_lds_b8(gaddr, laddr, 16, 0); } -// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b32( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_store_async_from_lds_b32( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef readonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_global_store_async_from_lds_b32(global int* gaddr, local int* laddr) @@ -105,9 +115,10 @@ void test_amdgcn_global_store_async_from_lds_b32(global int* gaddr, local int* l __builtin_amdgcn_global_store_async_from_lds_b32(gaddr, laddr, 16, 0); } -// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b64( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_store_async_from_lds_b64( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef readonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_global_store_async_from_lds_b64(global v2i* gaddr, local v2i* laddr) @@ -115,9 +126,10 @@ void test_amdgcn_global_store_async_from_lds_b64(global v2i* gaddr, local v2i* l __builtin_amdgcn_global_store_async_from_lds_b64(gaddr, laddr, 16, 0); } -// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b128( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_global_store_async_from_lds_b128( +// CHECK-GFX1250-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], ptr addrspace(3) noundef readonly captures(none) [[LADDR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) [[GADDR]], ptr addrspace(3) [[LADDR]], i32 16, i32 0) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_global_store_async_from_lds_b128(global v4i* gaddr, local v4i* laddr) @@ -125,9 +137,10 @@ void test_amdgcn_global_store_async_from_lds_b128(global v4i* gaddr, local v4i* __builtin_amdgcn_global_store_async_from_lds_b128(gaddr, laddr, 16, 0); } -// CHECK-GFX1250-LABEL: @test_amdgcn_ds_atomic_async_barrier_arrive_b64( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) [[ADDR:%.*]]) +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_ds_atomic_async_barrier_arrive_b64( +// CHECK-GFX1250-SAME: ptr addrspace(3) noundef [[ADDR:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) [[ADDR]]) // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_ds_atomic_async_barrier_arrive_b64(local long* addr) @@ -135,13 +148,20 @@ void test_amdgcn_ds_atomic_async_barrier_arrive_b64(local long* addr) __builtin_amdgcn_ds_atomic_async_barrier_arrive_b64(addr); } -// CHECK-GFX1250-LABEL: @test_amdgcn_ds_atomic_barrier_arrive_rtn_b64( -// CHECK-GFX1250-NEXT: entry: -// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) [[ADDR:%.*]], i64 [[DATA:%.*]]) -// CHECK-GFX1250-NEXT: store i64 [[TMP0]], ptr [[OUT:%.*]], align 8, !tbaa [[TBAA4:![0-9]+]] +// CHECK-GFX1250-LABEL: define dso_local void @test_amdgcn_ds_atomic_barrier_arrive_rtn_b64( +// CHECK-GFX1250-SAME: ptr addrspace(3) noundef captures(none) [[ADDR:%.*]], i64 noundef [[DATA:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { +// CHECK-GFX1250-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) [[ADDR]], i64 [[DATA]]) +// CHECK-GFX1250-NEXT: store i64 [[TMP0]], ptr [[OUT]], align 8, !tbaa [[LONG_TBAA4:![0-9]+]] // CHECK-GFX1250-NEXT: ret void // void test_amdgcn_ds_atomic_barrier_arrive_rtn_b64(local long* addr, long data, long *out) { *out = __builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64(addr, data); } +//. +// CHECK-GFX1250: [[LONG_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK-GFX1250: [[META5]] = !{!"long", [[META6:![0-9]+]], i64 0} +// CHECK-GFX1250: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// CHECK-GFX1250: [[META7]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl index 9927bb334c486..214390142b6aa 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200 @@ -13,10 +13,11 @@ typedef short v16s __attribute__((ext_vector_type(16))); // Wave32 -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_f16_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i32(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_f16_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i32(<8 x half> [[A]], <16 x half> [[B]], <8 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, int index) @@ -24,10 +25,11 @@ void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf16_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i32(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf16_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i32(<8 x i16> [[A]], <16 x i16> [[B]], <8 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, int index) @@ -35,10 +37,11 @@ void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8 *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f16_16x16x32_f16_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i32(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f16_16x16x32_f16_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i32(<8 x half> [[A]], <16 x half> [[B]], <8 x half> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, int index) @@ -46,10 +49,11 @@ void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_bf16_16x16x32_bf16_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i32(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_bf16_16x16x32_bf16_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i32(<8 x i16> [[A]], <16 x i16> [[B]], <8 x i16> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, int index) @@ -57,10 +61,11 @@ void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x32_iu8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A]], i1 true, <4 x i32> [[B]], <8 x i32> [[C]], i32 [[INDEX]], i1 true) +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, int index) @@ -68,10 +73,11 @@ void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32(true, a, true, b, c, index, true); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu4_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x32_iu4_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i32(i1 true, i32 [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i32 [[INDEX]], i1 true) +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, int index) @@ -79,10 +85,11 @@ void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32(true, a, true, b, c, index, true); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x64_iu4_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) -// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x64_iu4_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A]], i1 true, <4 x i32> [[B]], <8 x i32> [[C]], i32 [[INDEX]], i1 true) +// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, int index) @@ -90,10 +97,11 @@ void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32(true, a, true, b, c, index, true); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) @@ -101,10 +109,11 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) @@ -112,10 +121,11 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) @@ -123,13 +133,19 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A]], <4 x i32> [[B]], <8 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a, b, c, index); } +//. +// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl index eaa6b14d2a792..47753afd1aa52 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200 @@ -12,10 +12,11 @@ typedef short v8s __attribute__((ext_vector_type(8))); // Wave64 -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_f16_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i32(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_f16_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i32(<4 x half> [[A]], <8 x half> [[B]], <4 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f c, int index) @@ -23,10 +24,11 @@ void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf16_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i32(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf16_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i32(<4 x i16> [[A]], <8 x i16> [[B]], <4 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f c, int index) @@ -34,10 +36,11 @@ void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f16_16x16x32_f16_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i32(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x half> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f16_16x16x32_f16_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i32(<4 x half> [[A]], <8 x half> [[B]], <4 x half> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h c, int index) @@ -45,10 +48,11 @@ void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_bf16_16x16x32_bf16_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i32(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_bf16_16x16x32_bf16_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i32(<4 x i16> [[A]], <8 x i16> [[B]], <4 x i16> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4s c, int index) @@ -56,10 +60,11 @@ void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4 *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu8_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x32_iu8_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i32(i1 true, i32 [[A]], i1 true, <2 x i32> [[B]], <4 x i32> [[C]], i32 [[INDEX]], i1 true) +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i c, int index) @@ -67,10 +72,11 @@ void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64(true, a, true, b, c, index, true); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu4_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x32_iu4_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i32 [[INDEX]], i1 true) +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i c, int index) @@ -78,10 +84,11 @@ void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64(true, a, true, b, c, index, true); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x64_iu4_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) -// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_i32_16x16x64_iu4_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i32(i1 true, i32 [[A]], i1 true, <2 x i32> [[B]], <4 x i32> [[C]], i32 [[INDEX]], i1 true) +// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i c, int index) @@ -89,10 +96,11 @@ void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64(true, a, true, b, c, index, true); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index) @@ -100,10 +108,11 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b, *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index) @@ -111,10 +120,11 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b, *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index) @@ -122,13 +132,19 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b, *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(a, b, c, index); } -// CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64( -// CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) -// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64( +// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1200-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i32(i32 [[A]], <2 x i32> [[B]], <4 x float> [[C]], i32 [[INDEX]]) +// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1200-NEXT: ret void // void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(a, b, c, index); } +//. +// CHECK-GFX1200: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK-GFX1200: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-GFX1200: [[META6]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl index 2f9a367ecab8a..853cd32f8bdce 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -DWMMA_GFX1100_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1100 @@ -17,10 +17,11 @@ typedef short v16s __attribute__((ext_vector_type(16))); // amdgcn_wmma_f32_16x16x16_f16 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w32( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x float> [[C:%.*]]) -// CHECK-GFX1100-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w32( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v16f16(<16 x half> [[A]], <16 x half> [[B]], <8 x float> [[C]]) +// CHECK-GFX1100-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4:![0-9]+]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v16h a, v16h b, v8f c) @@ -32,10 +33,11 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v16h a, v16h b, v8f // amdgcn_wmma_f32_16x16x16_bf16 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w32( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x float> [[C:%.*]]) -// CHECK-GFX1100-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w32( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <8 x float> [[C]]) +// CHECK-GFX1100-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v16s a, v16s b, v8f c) @@ -47,10 +49,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v16s a, v16s b, v8f // amdgcn_wmma_f16_16x16x16_f16 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_w32( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <16 x half> [[C:%.*]], i1 true) -// CHECK-GFX1100-NEXT: store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w32( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <16 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <16 x half> [[C]], i1 true) +// CHECK-GFX1100-NEXT: store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v16h* out, v16h a, v16h b, v16h c) @@ -62,10 +65,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v16h* out, v16h a, v16h b, v16 // amdgcn_wmma_bf16_16x16x16_bf16 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_w32( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v16i16.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i16> [[C:%.*]], i1 true) -// CHECK-GFX1100-NEXT: store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w32( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <16 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v16i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <16 x i16> [[C]], i1 true) +// CHECK-GFX1100-NEXT: store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v16s* out, v16s a, v16s b, v16s c) @@ -77,10 +81,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v16s* out, v16s a, v16s b, v // amdgcn_wmma_f16_16x16x16_f16_tied // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_tied_w32( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v16f16.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <16 x half> [[C:%.*]], i1 true) -// CHECK-GFX1100-NEXT: store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_tied_w32( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <16 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v16f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <16 x half> [[C]], i1 true) +// CHECK-GFX1100-NEXT: store <16 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_tied_w32(global v16h* out, v16h a, v16h b, v16h c) @@ -92,10 +97,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_tied_w32(global v16h* out, v16h a, v16h b // amdgcn_wmma_bf16_16x16x16_bf16_tied // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v16i16.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i16> [[C:%.*]], i1 true) -// CHECK-GFX1100-NEXT: store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <16 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v16i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <16 x i16> [[C]], i1 true) +// CHECK-GFX1100-NEXT: store <16 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(global v16s* out, v16s a, v16s b, v16s c) @@ -107,10 +113,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(global v16s* out, v16s a, v16s // amdgcn_wmma_i32_16x16x16_iu8 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w32( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v4i32(i1 true, <4 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1100-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w32( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v4i32(i1 true, <4 x i32> [[A]], i1 true, <4 x i32> [[B]], <8 x i32> [[C]], i1 false) +// CHECK-GFX1100-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v4i a, v4i b, v8i c) @@ -122,10 +129,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v4i a, v4i b, v8i c) // amdgcn_wmma_i32_16x16x16_iu4 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w32( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1100-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w32( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false) +// CHECK-GFX1100-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, v2i a, v2i b, v8i c) @@ -134,3 +142,8 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, v2i a, v2i b, v8i c) } #endif +//. +// CHECK-GFX1100: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK-GFX1100: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-GFX1100: [[META6]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl index 8dfe69bb9a744..9b6872f6b1e6d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize64 -DWMMA_GFX1100_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1100 @@ -18,10 +18,11 @@ typedef short v16s __attribute__((ext_vector_type(16))); // amdgcn_wmma_f32_16x16x16_f16 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w64( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <4 x float> [[C:%.*]]) -// CHECK-GFX1100-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w64( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> [[A]], <16 x half> [[B]], <4 x float> [[C]]) +// CHECK-GFX1100-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4:![0-9]+]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v16h a, v16h b, v4f c) @@ -33,10 +34,11 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v16h a, v16h b, v4f // amdgcn_wmma_f32_16x16x16_bf16 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w64( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <4 x float> [[C:%.*]]) -// CHECK-GFX1100-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w64( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <4 x float> [[C]]) +// CHECK-GFX1100-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v16s a, v16s b, v4f c) @@ -48,10 +50,11 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v16s a, v16s b, v4f // amdgcn_wmma_f16_16x16x16_f16 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_w64( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i1 true) -// CHECK-GFX1100-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w64( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <8 x half> [[C]], i1 true) +// CHECK-GFX1100-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v8h* out, v16h a, v16h b, v8h c) @@ -63,10 +66,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v8h* out, v16h a, v16h b, v8h // amdgcn_wmma_bf16_16x16x16_bf16 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_w64( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i1 true) -// CHECK-GFX1100-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w64( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <8 x i16> [[C]], i1 true) +// CHECK-GFX1100-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v8s* out, v16s a, v16s b, v8s c) @@ -78,10 +82,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v8s* out, v16s a, v16s b, v8 // amdgcn_wmma_f16_16x16x16_f16_tied // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_f16_16x16x16_f16_tied_w64( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v8f16.v16f16(<16 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i1 true) -// CHECK-GFX1100-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_tied_w64( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x half> noundef [[A:%.*]], <16 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v8f16.v16f16(<16 x half> [[A]], <16 x half> [[B]], <8 x half> [[C]], i1 true) +// CHECK-GFX1100-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_f16_16x16x16_f16_tied_w64(global v8h* out, v16h a, v16h b, v8h c) @@ -93,10 +98,11 @@ void test_amdgcn_wmma_f16_16x16x16_f16_tied_w64(global v8h* out, v16h a, v16h b, // amdgcn_wmma_bf16_16x16x16_bf16_tied // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v8i16.v16i16(<16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i1 true) -// CHECK-GFX1100-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <16 x i16> noundef [[A:%.*]], <16 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v8i16.v16i16(<16 x i16> [[A]], <16 x i16> [[B]], <8 x i16> [[C]], i1 true) +// CHECK-GFX1100-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(global v8s* out, v16s a, v16s b, v8s c) @@ -108,10 +114,11 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(global v8s* out, v16s a, v16s // amdgcn_wmma_i32_16x16x16_iu8 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w64( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v4i32(i1 true, <4 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1100-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w64( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v4i32(i1 true, <4 x i32> [[A]], i1 true, <4 x i32> [[B]], <4 x i32> [[C]], i1 false) +// CHECK-GFX1100-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, v4i a, v4i b, v4i c) @@ -123,10 +130,11 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, v4i a, v4i b, v4i c) // amdgcn_wmma_i32_16x16x16_iu4 // -// CHECK-GFX1100-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w64( -// CHECK-GFX1100-NEXT: entry: -// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i1 false) -// CHECK-GFX1100-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1100-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w64( +// CHECK-GFX1100-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-GFX1100-NEXT: [[ENTRY:.*:]] +// CHECK-GFX1100-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <4 x i32> [[C]], i1 false) +// CHECK-GFX1100-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA4]] // CHECK-GFX1100-NEXT: ret void // void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, v2i a, v2i b, v4i c) @@ -135,3 +143,8 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, v2i a, v2i b, v4i c) } #endif +//. +// CHECK-GFX1100: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK-GFX1100: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-GFX1100: [[META6]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl index 4e40073c7e27a..4f2a75a76abbb 100644 --- a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl +++ b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 6 // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s // Check there's no assertion when passing a pointer to an address space @@ -33,7 +33,7 @@ __kernel void use_of_local_var() // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[X]]) #[[ATTR5:[0-9]+]] -// CHECK-NEXT: store i32 0, ptr addrspace(5) [[X]], align 4, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr addrspace(5) [[X]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] // CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[X]]) #[[ATTR6:[0-9]+]] // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr // CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ASCAST]]) #[[ATTR6]] @@ -46,7 +46,7 @@ __kernel void use_of_local_var() // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr -// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4, !tbaa [[TBAA4]] +// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4, !tbaa [[INT_TBAA4]] // CHECK-NEXT: [[X_ADDR_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR_ASCAST]] to ptr addrspace(5) // CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[X_ADDR_ASCAST_ASCAST]]) #[[ATTR6]] // CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ADDR_ASCAST]]) #[[ATTR6]] @@ -68,7 +68,7 @@ __kernel void use_of_local_var() // CHECK-NEXT: ret void // //. -// CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} // CHECK: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} // CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} // CHECK: [[META7]] = !{!"Simple C/C++ TBAA"} diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl index e73657e30d884..6e5c1c49504ec 100644 --- a/clang/test/CodeGenOpenCL/preserve_vec3.cl +++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s typedef char char3 __attribute__((ext_vector_type(3))); @@ -12,8 +12,8 @@ typedef float float4 __attribute__((ext_vector_type(4))); // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]] // CHECK-NEXT: ret void // void kernel foo(global float3 *a, global float3 *b) { @@ -23,9 +23,9 @@ void kernel foo(global float3 *a, global float3 *b) { // CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3( // CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: ret void // void kernel float4_to_float3(global float3 *a, global float4 *b) { @@ -36,8 +36,8 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) { // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] +// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[ASTYPE_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: ret void // void kernel float3_to_float4(global float3 *a, global float4 *b) { @@ -49,7 +49,7 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] +// CHECK-NEXT: store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: ret void // void kernel float3_to_double2(global float3 *a, global double2 *b) { @@ -59,9 +59,9 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) { // CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3( // CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[CHAR_TBAA8]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: ret void // void kernel char8_to_short3(global short3 *a, global char8 *b) { @@ -69,10 +69,10 @@ void kernel char8_to_short3(global short3 *a, global char8 *b) { } // CHECK-LABEL: define dso_local spir_func void @from_char3( -// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> -// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]] +// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[INT_TBAA17:![0-9]+]] // CHECK-NEXT: ret void // void from_char3(char3 a, global int *out) { @@ -80,10 +80,10 @@ void from_char3(char3 a, global int *out) { } // CHECK-LABEL: define dso_local spir_func void @from_short3( -// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA19:![0-9]+]] // CHECK-NEXT: ret void // void from_short3(short3 a, global long *out) { @@ -91,11 +91,11 @@ void from_short3(short3 a, global long *out) { } // CHECK-LABEL: define dso_local spir_func void @scalar_to_char3( -// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8> // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> -// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: ret void // void scalar_to_char3(int a, global char3 *out) { @@ -103,11 +103,11 @@ void scalar_to_char3(int a, global char3 *out) { } // CHECK-LABEL: define dso_local spir_func void @scalar_to_short3( -// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16> // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]] // CHECK-NEXT: ret void // void scalar_to_short3(long a, global short3 *out) { @@ -120,7 +120,7 @@ void scalar_to_short3(long a, global short3 *out) { // CHECK: [[META5]] = !{!"float3*", !"float3*"} // CHECK: [[META6]] = !{!"float __attribute__((ext_vector_type(3)))*", !"float __attribute__((ext_vector_type(3)))*"} // CHECK: [[META7]] = !{!"", !""} -// CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK: [[CHAR_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} // CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0} // CHECK: [[META10]] = !{!"Simple C/C++ TBAA"} // CHECK: [[META11]] = !{!"float3*", !"float4*"} @@ -129,8 +129,8 @@ void scalar_to_short3(long a, global short3 *out) { // CHECK: [[META14]] = !{!"float __attribute__((ext_vector_type(3)))*", !"double __attribute__((ext_vector_type(2)))*"} // CHECK: [[META15]] = !{!"short3*", !"char8*"} // CHECK: [[META16]] = !{!"short __attribute__((ext_vector_type(3)))*", !"char __attribute__((ext_vector_type(8)))*"} -// CHECK: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// CHECK: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} // CHECK: [[META18]] = !{!"int", [[META9]], i64 0} -// CHECK: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} +// CHECK: [[LONG_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} // CHECK: [[META20]] = !{!"long", [[META9]], i64 0} //. diff --git a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp index 8d8f0b0b5d699..e932e75d025e0 100644 --- a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp +++ b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp @@ -1,12 +1,12 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 //RUN: %clang_cc1 %s -triple spir -emit-llvm -O1 -o - | FileCheck %s // CHECK-LABEL: define dso_local spir_kernel void @test( // CHECK-SAME: ptr addrspace(1) noundef readonly align 8 captures(none) [[IN:%.*]], ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[IN]], i32 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(1) [[ARRAYIDX1]], align 8, !tbaa [[TBAA8:![0-9]+]] -// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]] +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[IN]], i32 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(1) [[ARRAYIDX1_I]], align 8, !tbaa [[LONG_TBAA8:![0-9]+]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA8]] // CHECK-NEXT: ret void // __kernel void test(__global long *In, __global long *Out) { @@ -18,7 +18,7 @@ __kernel void test(__global long *In, __global long *Out) { // CHECK: [[META5]] = !{!"none", !"none"} // CHECK: [[META6]] = !{!"long*", !"long*"} // CHECK: [[META7]] = !{!"", !""} -// CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK: [[LONG_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} // CHECK: [[META9]] = !{!"long", [[META10:![0-9]+]], i64 0} // CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0} // CHECK: [[META11]] = !{!"Simple C++ TBAA"} diff --git a/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c b/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c index 88e691d65334c..6ca17e1f9f285 100644 --- a/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c +++ b/clang/test/DebugInfo/Generic/unsigned-promotion-debuginfo.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -O2 -triple x86_64-linux-gnu -emit-llvm -o - %s \ // RUN: -fdebug-prefix-map=%S/= -fno-ident -fdebug-compilation-dir=%S -debug-info-kind=limited \ // RUN: -fsanitize-annotate-debug-info=signed-integer-overflow \ @@ -14,9 +14,9 @@ unsigned short si, sj, sk; // CHECKS-LABEL: define dso_local void @testshortmul( // CHECKS-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] { // CHECKS-NEXT: [[ENTRY:.*:]] -// CHECKS-NEXT: [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[TBAA17:![0-9]+]] +// CHECKS-NEXT: [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[SHORT_TBAA17:![0-9]+]] // CHECKS-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i32, !dbg [[DBG16]] -// CHECKS-NEXT: [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[TBAA17]] +// CHECKS-NEXT: [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[SHORT_TBAA17]] // CHECKS-NEXT: [[CONV1:%.*]] = zext i16 [[TMP1]] to i32, !dbg [[DBG21]] // CHECKS-NEXT: [[TMP2:%.*]] = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[CONV]], i32 [[CONV1]]), !dbg [[DBG22:![0-9]+]], !nosanitize [[META26:![0-9]+]] // CHECKS-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !dbg [[DBG22]], !nosanitize [[META26]] @@ -29,16 +29,16 @@ unsigned short si, sj, sk; // CHECKS: [[CONT]]: // CHECKS-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !dbg [[DBG22]], !nosanitize [[META26]] // CHECKS-NEXT: [[CONV2:%.*]] = trunc i32 [[TMP6]] to i16, !dbg [[DBG16]] -// CHECKS-NEXT: store i16 [[CONV2]], ptr @si, align 2, !dbg [[DBG28:![0-9]+]], !tbaa [[TBAA17]] +// CHECKS-NEXT: store i16 [[CONV2]], ptr @si, align 2, !dbg [[DBG28:![0-9]+]], !tbaa [[SHORT_TBAA17]] // CHECKS-NEXT: ret void, !dbg [[DBG29:![0-9]+]] // // CHECKU-LABEL: define dso_local void @testshortmul( // CHECKU-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] { // CHECKU-NEXT: [[ENTRY:.*:]] -// CHECKU-NEXT: [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[TBAA17:![0-9]+]] -// CHECKU-NEXT: [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[TBAA17]] +// CHECKU-NEXT: [[TMP0:%.*]] = load i16, ptr @sj, align 2, !dbg [[DBG16:![0-9]+]], !tbaa [[SHORT_TBAA17:![0-9]+]] +// CHECKU-NEXT: [[TMP1:%.*]] = load i16, ptr @sk, align 2, !dbg [[DBG21:![0-9]+]], !tbaa [[SHORT_TBAA17]] // CHECKU-NEXT: [[MUL:%.*]] = mul i16 [[TMP1]], [[TMP0]], !dbg [[DBG22:![0-9]+]] -// CHECKU-NEXT: store i16 [[MUL]], ptr @si, align 2, !dbg [[DBG23:![0-9]+]], !tbaa [[TBAA17]] +// CHECKU-NEXT: store i16 [[MUL]], ptr @si, align 2, !dbg [[DBG23:![0-9]+]], !tbaa [[SHORT_TBAA17]] // CHECKU-NEXT: ret void, !dbg [[DBG24:![0-9]+]] // void testshortmul(void) { @@ -50,7 +50,7 @@ void testshortmul(void) { // CHECKS: [[META0:![0-9]+]] = !DIGlobalVariableExpression(var: [[META1:![0-9]+]], expr: !DIExpression()) // CHECKS: [[META1]] = distinct !DIGlobalVariable(name: "sj", scope: [[META2:![0-9]+]], file: [[META7:![0-9]+]], line: 12, type: [[META8:![0-9]+]], isLocal: false, isDefinition: true) // CHECKS: [[META2]] = distinct !DICompileUnit(language: DW_LANG_C11, file: [[META3:![0-9]+]], isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: [[META4:![0-9]+]], splitDebugInlining: false, nameTableKind: None) -// CHECKS: [[META3]] = !DIFile(filename: "", directory: {{.*}}) +// CHECKS: [[META3]] = !DIFile(filename: "{{.*}}", directory: {{.*}}) // CHECKS: [[META4]] = !{[[META5:![0-9]+]], [[META0]], [[META9:![0-9]+]]} // CHECKS: [[META5]] = !DIGlobalVariableExpression(var: [[META6:![0-9]+]], expr: !DIExpression()) // CHECKS: [[META6]] = distinct !DIGlobalVariable(name: "si", scope: [[META2]], file: [[META7]], line: 12, type: [[META8]], isLocal: false, isDefinition: true) @@ -62,7 +62,7 @@ void testshortmul(void) { // CHECKS: [[META14]] = !DISubroutineType(types: [[META15:![0-9]+]]) // CHECKS: [[META15]] = !{null} // CHECKS: [[DBG16]] = !DILocation(line: 47, column: 8, scope: [[DBG13]]) -// CHECKS: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// CHECKS: [[SHORT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} // CHECKS: [[META18]] = !{!"short", [[META19:![0-9]+]], i64 0} // CHECKS: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0} // CHECKS: [[META20]] = !{!"Simple C/C++ TBAA"} @@ -79,7 +79,7 @@ void testshortmul(void) { // CHECKU: [[META0:![0-9]+]] = !DIGlobalVariableExpression(var: [[META1:![0-9]+]], expr: !DIExpression()) // CHECKU: [[META1]] = distinct !DIGlobalVariable(name: "sj", scope: [[META2:![0-9]+]], file: [[META7:![0-9]+]], line: 12, type: [[META8:![0-9]+]], isLocal: false, isDefinition: true) // CHECKU: [[META2]] = distinct !DICompileUnit(language: DW_LANG_C11, file: [[META3:![0-9]+]], isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: [[META4:![0-9]+]], splitDebugInlining: false, nameTableKind: None) -// CHECKU: [[META3]] = !DIFile(filename: "", directory: {{.*}}) +// CHECKU: [[META3]] = !DIFile(filename: "{{.*}}", directory: {{.*}}) // CHECKU: [[META4]] = !{[[META5:![0-9]+]], [[META0]], [[META9:![0-9]+]]} // CHECKU: [[META5]] = !DIGlobalVariableExpression(var: [[META6:![0-9]+]], expr: !DIExpression()) // CHECKU: [[META6]] = distinct !DIGlobalVariable(name: "si", scope: [[META2]], file: [[META7]], line: 12, type: [[META8]], isLocal: false, isDefinition: true) @@ -91,7 +91,7 @@ void testshortmul(void) { // CHECKU: [[META14]] = !DISubroutineType(types: [[META15:![0-9]+]]) // CHECKU: [[META15]] = !{null} // CHECKU: [[DBG16]] = !DILocation(line: 47, column: 8, scope: [[DBG13]]) -// CHECKU: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// CHECKU: [[SHORT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} // CHECKU: [[META18]] = !{!"short", [[META19:![0-9]+]], i64 0} // CHECKU: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0} // CHECKU: [[META20]] = !{!"Simple C/C++ TBAA"} diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 15bdb7589bf45..b88aa3cc18207 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // REQUIRES: spirv-registered-target @@ -47,41 +47,43 @@ #define BOOL_TYPE int typedef unsigned long long uint64_t; -// CHECK-LABEL: @test___make_mantissa_base8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4:![0-9]+]] +// CHECK-LABEL: define dso_local i64 @test___make_mantissa_base8( +// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4:![0-9]+]] // CHECK-NEXT: [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I1]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// CHECK: while.body.i: -// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], [[IF_THEN_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] -// CHECK-NEXT: [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], [[IF_THEN_I]] ], [ 0, [[ENTRY]] ] -// CHECK-NEXT: [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_THEN_I]] ], [ [[P]], [[ENTRY]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I1]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// CHECK: [[WHILE_BODY_I]]: +// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], %[[IF_THEN_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ] +// CHECK-NEXT: [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], %[[IF_THEN_I]] ], [ 0, %[[ENTRY]] ] +// CHECK-NEXT: [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[IF_THEN_I]] ], [ [[P]], %[[ENTRY]] ] // CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], -8 // CHECK-NEXT: [[OR_COND_I:%.*]] = icmp eq i8 [[TMP2]], 48 -// CHECK-NEXT: br i1 [[OR_COND_I]], label [[IF_THEN_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]] -// CHECK: if.then.i: +// CHECK-NEXT: br i1 [[OR_COND_I]], label %[[IF_THEN_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]] +// CHECK: [[IF_THEN_I]]: // CHECK-NEXT: [[MUL_I:%.*]] = shl i64 [[__R_0_I3]], 3 // CHECK-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP1]] to i64 // CHECK-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 // CHECK-NEXT: [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]] // CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1 -// CHECK-NEXT: [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP7:![0-9]+]] -// CHECK: _ZL21__make_mantissa_base8PKc.exit: -// CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[WHILE_BODY_I]] ], [ [[SUB_I]], [[IF_THEN_I]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK: [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]: +// CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[WHILE_BODY_I]] ], [ [[SUB_I]], %[[IF_THEN_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] // -// AMDGCNSPIRV-LABEL: @test___make_mantissa_base8( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]] -// AMDGCNSPIRV: while.cond.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5:![0-9]+]] +// AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa_base8( +// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: br label %[[WHILE_COND_I:.*]] +// AMDGCNSPIRV: [[WHILE_COND_I]]: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P]], %[[ENTRY]] ], [ [[__TAGP_ADDR_1_I:%.*]], %[[WHILE_BODY_I:.*]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[__R_1_I:%.*]], %[[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[CHAR_TBAA5:![0-9]+]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I]] -// AMDGCNSPIRV: while.body.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:.*]], label %[[WHILE_BODY_I]] +// AMDGCNSPIRV: [[WHILE_BODY_I]]: // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], -8 // AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp eq i8 [[TMP1]], 48 // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl i64 [[__R_0_I]], 3 @@ -91,50 +93,52 @@ typedef unsigned long long uint64_t; // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP8:![0-9]+]] -// AMDGCNSPIRV: _ZL21__make_mantissa_base8PKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label %[[WHILE_COND_I]], label %[[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP8:![0-9]+]] +// AMDGCNSPIRV: [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[WHILE_BODY_I]] ], [ [[__R_0_I]], %[[WHILE_COND_I]] ] // AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] // extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { return __make_mantissa_base8(p); } -// CHECK-LABEL: @test___make_mantissa_base10( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]] +// CHECK-LABEL: define dso_local i64 @test___make_mantissa_base10( +// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// CHECK: while.body.i: -// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], [[IF_THEN_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] -// CHECK-NEXT: [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], [[IF_THEN_I]] ], [ 0, [[ENTRY]] ] -// CHECK-NEXT: [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_THEN_I]] ], [ [[P]], [[ENTRY]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I1]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// CHECK: [[WHILE_BODY_I]]: +// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], %[[IF_THEN_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ] +// CHECK-NEXT: [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], %[[IF_THEN_I]] ], [ 0, %[[ENTRY]] ] +// CHECK-NEXT: [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[IF_THEN_I]] ], [ [[P]], %[[ENTRY]] ] // CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -48 // CHECK-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10 -// CHECK-NEXT: br i1 [[OR_COND_I]], label [[IF_THEN_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]] -// CHECK: if.then.i: +// CHECK-NEXT: br i1 [[OR_COND_I]], label %[[IF_THEN_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]] +// CHECK: [[IF_THEN_I]]: // CHECK-NEXT: [[MUL_I:%.*]] = mul i64 [[__R_0_I3]], 10 // CHECK-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP1]] to i64 // CHECK-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 // CHECK-NEXT: [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]] // CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1 -// CHECK-NEXT: [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK: _ZL22__make_mantissa_base10PKc.exit: -// CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[WHILE_BODY_I]] ], [ [[SUB_I]], [[IF_THEN_I]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK: [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]: +// CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[WHILE_BODY_I]] ], [ [[SUB_I]], %[[IF_THEN_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] // -// AMDGCNSPIRV-LABEL: @test___make_mantissa_base10( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]] -// AMDGCNSPIRV: while.cond.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa_base10( +// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: br label %[[WHILE_COND_I:.*]] +// AMDGCNSPIRV: [[WHILE_COND_I]]: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P]], %[[ENTRY]] ], [ [[__TAGP_ADDR_1_I:%.*]], %[[WHILE_BODY_I:.*]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[__R_1_I:%.*]], %[[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I]] -// AMDGCNSPIRV: while.body.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:.*]], label %[[WHILE_BODY_I]] +// AMDGCNSPIRV: [[WHILE_BODY_I]]: // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = add i8 [[TMP0]], -48 // AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10 // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = mul i64 [[__R_0_I]], 10 @@ -144,220 +148,224 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]] -// AMDGCNSPIRV: _ZL22__make_mantissa_base10PKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label %[[WHILE_COND_I]], label %[[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +// AMDGCNSPIRV: [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[WHILE_BODY_I]] ], [ [[__R_0_I]], %[[WHILE_COND_I]] ] // AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] // extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { return __make_mantissa_base10(p); } -// CHECK-LABEL: @test___make_mantissa_base16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]] +// CHECK-LABEL: define dso_local i64 @test___make_mantissa_base16( +// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// CHECK: while.body.i: -// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], [[IF_END31_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] -// CHECK-NEXT: [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], [[IF_END31_I]] ], [ 0, [[ENTRY]] ] -// CHECK-NEXT: [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_END31_I]] ], [ [[P]], [[ENTRY]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I1]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// CHECK: [[WHILE_BODY_I]]: +// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], %[[IF_END31_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ] +// CHECK-NEXT: [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], %[[IF_END31_I]] ], [ 0, %[[ENTRY]] ] +// CHECK-NEXT: [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[IF_END31_I]] ], [ [[P]], %[[ENTRY]] ] // CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -48 // CHECK-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10 -// CHECK-NEXT: br i1 [[OR_COND_I]], label [[IF_END31_I]], label [[IF_ELSE_I:%.*]] -// CHECK: if.else.i: +// CHECK-NEXT: br i1 [[OR_COND_I]], label %[[IF_END31_I]], label %[[IF_ELSE_I:.*]] +// CHECK: [[IF_ELSE_I]]: // CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP1]], -97 // CHECK-NEXT: [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP3]], 6 -// CHECK-NEXT: br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]] -// CHECK: if.else17.i: +// CHECK-NEXT: br i1 [[OR_COND33_I]], label %[[IF_END31_I]], label %[[IF_ELSE17_I:.*]] +// CHECK: [[IF_ELSE17_I]]: // CHECK-NEXT: [[TMP4:%.*]] = add i8 [[TMP1]], -65 // CHECK-NEXT: [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP4]], 6 -// CHECK-NEXT: br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]] -// CHECK: if.end31.i: -// CHECK-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ] +// CHECK-NEXT: br i1 [[OR_COND34_I]], label %[[IF_END31_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]] +// CHECK: [[IF_END31_I]]: +// CHECK-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I]] ], [ -87, %[[IF_ELSE_I]] ], [ -55, %[[IF_ELSE17_I]] ] // CHECK-NEXT: [[MUL24_I:%.*]] = shl i64 [[__R_0_I3]], 4 // CHECK-NEXT: [[CONV25_I:%.*]] = zext nneg i8 [[TMP1]] to i64 // CHECK-NEXT: [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]] // CHECK-NEXT: [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]] // CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1 -// CHECK-NEXT: [[TMP5]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP5]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP11:![0-9]+]] -// CHECK: _ZL22__make_mantissa_base16PKc.exit: -// CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[IF_ELSE17_I]] ], [ [[ADD28_I]], [[IF_END31_I]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK: [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]: +// CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[IF_ELSE17_I]] ], [ [[ADD28_I]], %[[IF_END31_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] // -// AMDGCNSPIRV-LABEL: @test___make_mantissa_base16( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P:%.*]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa_base16( +// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], [[IF_END31_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], [[IF_END31_I]] ], [ 0, [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I2:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[IF_END31_I]] ], [ [[P]], [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I1]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// AMDGCNSPIRV: [[WHILE_BODY_I]]: +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], %[[IF_END31_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], %[[IF_END31_I]] ], [ 0, %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I2:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[IF_END31_I]] ], [ [[P]], %[[ENTRY]] ] // AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -48 // AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[IF_END31_I]], label [[IF_ELSE_I:%.*]] -// AMDGCNSPIRV: if.else.i: +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label %[[IF_END31_I]], label %[[IF_ELSE_I:.*]] +// AMDGCNSPIRV: [[IF_ELSE_I]]: // AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP1]], -97 // AMDGCNSPIRV-NEXT: [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP3]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]] -// AMDGCNSPIRV: if.else17.i: +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I]], label %[[IF_END31_I]], label %[[IF_ELSE17_I:.*]] +// AMDGCNSPIRV: [[IF_ELSE17_I]]: // AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP1]], -65 // AMDGCNSPIRV-NEXT: [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP4]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]] -// AMDGCNSPIRV: if.end31.i: -// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I]], label %[[IF_END31_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]] +// AMDGCNSPIRV: [[IF_END31_I]]: +// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I]] ], [ -87, %[[IF_ELSE_I]] ], [ -55, %[[IF_ELSE17_I]] ] // AMDGCNSPIRV-NEXT: [[MUL24_I:%.*]] = shl i64 [[__R_0_I3]], 4 // AMDGCNSPIRV-NEXT: [[CONV25_I:%.*]] = zext nneg i8 [[TMP1]] to i64 // AMDGCNSPIRV-NEXT: [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]] // AMDGCNSPIRV-NEXT: [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I2]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP5]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP5]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP12:![0-9]+]] -// AMDGCNSPIRV: _ZL22__make_mantissa_base16PKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[IF_ELSE17_I]] ], [ [[ADD28_I]], [[IF_END31_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label %[[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP12:![0-9]+]] +// AMDGCNSPIRV: [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[IF_ELSE17_I]] ], [ [[ADD28_I]], %[[IF_END31_I]] ] // AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] // extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { return __make_mantissa_base16(p); } -// CHECK-LABEL: @test___make_mantissa( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]] +// CHECK-LABEL: define dso_local i64 @test___make_mantissa( +// CHECK-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// CHECK-NEXT: br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I_PREHEADER:%.*]] -// CHECK: while.cond.i14.i.preheader: -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[TBAA4]] +// CHECK-NEXT: br i1 [[CMP_I]], label %[[IF_THEN_I:.*]], label %[[WHILE_COND_I14_I_PREHEADER:.*]] +// CHECK: [[WHILE_COND_I14_I_PREHEADER]]: +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I17_I5:%.*]] = icmp eq i8 [[TMP1]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I17_I5]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I:%.*]] -// CHECK: if.then.i: +// CHECK-NEXT: br i1 [[CMP_NOT_I17_I5]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I:.*]] +// CHECK: [[IF_THEN_I]]: // CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1 -// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: switch i8 [[TMP2]], label [[WHILE_COND_I_I_PREHEADER:%.*]] [ -// CHECK-NEXT: i8 120, label [[IF_THEN5_I:%.*]] -// CHECK-NEXT: i8 88, label [[IF_THEN5_I]] +// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] +// CHECK-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_PREHEADER:.*]] [ +// CHECK-NEXT: i8 120, label %[[IF_THEN5_I:.*]] +// CHECK-NEXT: i8 88, label %[[IF_THEN5_I]] // CHECK-NEXT: ] -// CHECK: while.cond.i.i.preheader: -// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]] +// CHECK: [[WHILE_COND_I_I_PREHEADER]]: +// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I_I14]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I:%.*]] -// CHECK: if.then5.i: -// CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]] +// CHECK-NEXT: br i1 [[CMP_NOT_I_I14]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I:.*]] +// CHECK: [[IF_THEN5_I]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I30_I9:%.*]] = icmp eq i8 [[TMP4]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I30_I9]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I31_I:%.*]] -// CHECK: while.body.i31.i: -// CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I]] ] -// CHECK-NEXT: [[__R_0_I29_I11:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], [[IF_END31_I_I]] ], [ 0, [[IF_THEN5_I]] ] -// CHECK-NEXT: [[__TAGP_ADDR_0_I28_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I:%.*]], [[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], [[IF_THEN5_I]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I30_I9]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I31_I:.*]] +// CHECK: [[WHILE_BODY_I31_I]]: +// CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I]] ] +// CHECK-NEXT: [[__R_0_I29_I11:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], %[[IF_END31_I_I]] ], [ 0, %[[IF_THEN5_I]] ] +// CHECK-NEXT: [[__TAGP_ADDR_0_I28_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I:%.*]], %[[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], %[[IF_THEN5_I]] ] // CHECK-NEXT: [[TMP6:%.*]] = add i8 [[TMP5]], -48 // CHECK-NEXT: [[OR_COND_I32_I:%.*]] = icmp ult i8 [[TMP6]], 10 -// CHECK-NEXT: br i1 [[OR_COND_I32_I]], label [[IF_END31_I_I]], label [[IF_ELSE_I_I:%.*]] -// CHECK: if.else.i.i: +// CHECK-NEXT: br i1 [[OR_COND_I32_I]], label %[[IF_END31_I_I]], label %[[IF_ELSE_I_I:.*]] +// CHECK: [[IF_ELSE_I_I]]: // CHECK-NEXT: [[TMP7:%.*]] = add i8 [[TMP5]], -97 // CHECK-NEXT: [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP7]], 6 -// CHECK-NEXT: br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]] -// CHECK: if.else17.i.i: +// CHECK-NEXT: br i1 [[OR_COND33_I_I]], label %[[IF_END31_I_I]], label %[[IF_ELSE17_I_I:.*]] +// CHECK: [[IF_ELSE17_I_I]]: // CHECK-NEXT: [[TMP8:%.*]] = add i8 [[TMP5]], -65 // CHECK-NEXT: [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP8]], 6 -// CHECK-NEXT: br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]] -// CHECK: if.end31.i.i: -// CHECK-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ] +// CHECK-NEXT: br i1 [[OR_COND34_I_I]], label %[[IF_END31_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]] +// CHECK: [[IF_END31_I_I]]: +// CHECK-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I]] ], [ -87, %[[IF_ELSE_I_I]] ], [ -55, %[[IF_ELSE17_I_I]] ] // CHECK-NEXT: [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I29_I11]], 4 // CHECK-NEXT: [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64 // CHECK-NEXT: [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]] // CHECK-NEXT: [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]] // CHECK-NEXT: [[INCDEC_PTR_I34_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I10]], i64 1 -// CHECK-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I]], align 1, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I30_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I30_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I31_I]], !llvm.loop [[LOOP11]] -// CHECK: while.body.i.i: -// CHECK-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_PREHEADER]] ] -// CHECK-NEXT: [[__R_0_I_I16:%.*]] = phi i64 [ [[SUB_I_I:%.*]], [[IF_THEN_I_I]] ], [ 0, [[WHILE_COND_I_I_PREHEADER]] ] -// CHECK-NEXT: [[__TAGP_ADDR_0_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], [[IF_THEN_I_I]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I_I_PREHEADER]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I30_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I31_I]], !llvm.loop [[LOOP11]] +// CHECK: [[WHILE_BODY_I_I]]: +// CHECK-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_PREHEADER]] ] +// CHECK-NEXT: [[__R_0_I_I16:%.*]] = phi i64 [ [[SUB_I_I:%.*]], %[[IF_THEN_I_I]] ], [ 0, %[[WHILE_COND_I_I_PREHEADER]] ] +// CHECK-NEXT: [[__TAGP_ADDR_0_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[IF_THEN_I_I]] ], [ [[INCDEC_PTR_I]], %[[WHILE_COND_I_I_PREHEADER]] ] // CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], -8 // CHECK-NEXT: [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP11]], 48 -// CHECK-NEXT: br i1 [[OR_COND_I_I]], label [[IF_THEN_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]] -// CHECK: if.then.i.i: +// CHECK-NEXT: br i1 [[OR_COND_I_I]], label %[[IF_THEN_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]] +// CHECK: [[IF_THEN_I_I]]: // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I16]], 3 // CHECK-NEXT: [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48 // CHECK-NEXT: [[SUB_I_I]] = add i64 [[ADD_I_I]], [[CONV5_I_I]] // CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I15]], i64 1 -// CHECK-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]], !llvm.loop [[LOOP7]] -// CHECK: while.body.i18.i: -// CHECK-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_PREHEADER]] ] -// CHECK-NEXT: [[__R_0_I16_I7:%.*]] = phi i64 [ [[SUB_I25_I:%.*]], [[IF_THEN_I21_I]] ], [ 0, [[WHILE_COND_I14_I_PREHEADER]] ] -// CHECK-NEXT: [[__TAGP_ADDR_0_I15_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I:%.*]], [[IF_THEN_I21_I]] ], [ [[P]], [[WHILE_COND_I14_I_PREHEADER]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I]], !llvm.loop [[LOOP7]] +// CHECK: [[WHILE_BODY_I18_I]]: +// CHECK-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_PREHEADER]] ] +// CHECK-NEXT: [[__R_0_I16_I7:%.*]] = phi i64 [ [[SUB_I25_I:%.*]], %[[IF_THEN_I21_I]] ], [ 0, %[[WHILE_COND_I14_I_PREHEADER]] ] +// CHECK-NEXT: [[__TAGP_ADDR_0_I15_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I:%.*]], %[[IF_THEN_I21_I]] ], [ [[P]], %[[WHILE_COND_I14_I_PREHEADER]] ] // CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], -48 // CHECK-NEXT: [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP14]], 10 -// CHECK-NEXT: br i1 [[OR_COND_I19_I]], label [[IF_THEN_I21_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]] -// CHECK: if.then.i21.i: +// CHECK-NEXT: br i1 [[OR_COND_I19_I]], label %[[IF_THEN_I21_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]] +// CHECK: [[IF_THEN_I21_I]]: // CHECK-NEXT: [[MUL_I22_I:%.*]] = mul i64 [[__R_0_I16_I7]], 10 // CHECK-NEXT: [[CONV5_I23_I:%.*]] = zext nneg i8 [[TMP13]] to i64 // CHECK-NEXT: [[ADD_I24_I:%.*]] = add i64 [[MUL_I22_I]], -48 // CHECK-NEXT: [[SUB_I25_I]] = add i64 [[ADD_I24_I]], [[CONV5_I23_I]] // CHECK-NEXT: [[INCDEC_PTR_I26_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I6]], i64 1 -// CHECK-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I]], align 1, !tbaa [[TBAA4]] +// CHECK-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I]], align 1, !tbaa [[CHAR_TBAA4]] // CHECK-NEXT: [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]], !llvm.loop [[LOOP10]] -// CHECK: _ZL15__make_mantissaPKc.exit: -// CHECK-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I]] ], [ 0, [[WHILE_COND_I14_I_PREHEADER]] ], [ [[SUB_I_I]], [[IF_THEN_I_I]] ], [ 0, [[WHILE_BODY_I_I]] ], [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ 0, [[IF_ELSE17_I_I]] ], [ [[SUB_I25_I]], [[IF_THEN_I21_I]] ], [ 0, [[WHILE_BODY_I18_I]] ] +// CHECK-NEXT: br i1 [[CMP_NOT_I17_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I18_I]], !llvm.loop [[LOOP10]] +// CHECK: [[_ZL15__MAKE_MANTISSAPKC_EXIT]]: +// CHECK-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I]] ], [ 0, %[[WHILE_COND_I14_I_PREHEADER]] ], [ [[SUB_I_I]], %[[IF_THEN_I_I]] ], [ 0, %[[WHILE_BODY_I_I]] ], [ [[ADD28_I_I]], %[[IF_END31_I_I]] ], [ 0, %[[IF_ELSE17_I_I]] ], [ [[SUB_I25_I]], %[[IF_THEN_I21_I]] ], [ 0, %[[WHILE_BODY_I18_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test___make_mantissa( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P:%.*]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-LABEL: define spir_func i64 @test___make_mantissa( +// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I:%.*]] -// AMDGCNSPIRV: if.then.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_I]], label %[[IF_THEN_I:.*]], label %[[WHILE_COND_I14_I:.*]] +// AMDGCNSPIRV: [[IF_THEN_I]]: // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[P]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i8 120, label [[IF_THEN5_I:%.*]] -// AMDGCNSPIRV-NEXT: i8 88, label [[IF_THEN5_I]] +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label %[[WHILE_COND_I_I:.*]] [ +// AMDGCNSPIRV-NEXT: i8 120, label %[[IF_THEN5_I:.*]] +// AMDGCNSPIRV-NEXT: i8 88, label %[[IF_THEN5_I]] // AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then5.i: -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV: [[IF_THEN5_I]]: +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I5:%.*]] = icmp eq i8 [[TMP2]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I5]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I:%.*]] -// AMDGCNSPIRV: while.body.i32.i: -// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I30_I7:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], [[IF_END31_I_I]] ], [ 0, [[IF_THEN5_I]] ] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I:%.*]], [[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], [[IF_THEN5_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I5]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT:.*]], label %[[WHILE_BODY_I32_I:.*]] +// AMDGCNSPIRV: [[WHILE_BODY_I32_I]]: +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], %[[IF_END31_I_I:.*]] ], [ [[TMP2]], %[[IF_THEN5_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I30_I7:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], %[[IF_END31_I_I]] ], [ 0, %[[IF_THEN5_I]] ] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I:%.*]], %[[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], %[[IF_THEN5_I]] ] // AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP3]], -48 // AMDGCNSPIRV-NEXT: [[OR_COND_I33_I:%.*]] = icmp ult i8 [[TMP4]], 10 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I]], label [[IF_END31_I_I]], label [[IF_ELSE_I_I:%.*]] -// AMDGCNSPIRV: if.else.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I]], label %[[IF_END31_I_I]], label %[[IF_ELSE_I_I:.*]] +// AMDGCNSPIRV: [[IF_ELSE_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = add i8 [[TMP3]], -97 // AMDGCNSPIRV-NEXT: [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]] -// AMDGCNSPIRV: if.else17.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I]], label %[[IF_END31_I_I]], label %[[IF_ELSE17_I_I:.*]] +// AMDGCNSPIRV: [[IF_ELSE17_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = add i8 [[TMP3]], -65 // AMDGCNSPIRV-NEXT: [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP6]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]] -// AMDGCNSPIRV: if.end31.i.i: -// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I]], label %[[IF_END31_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]] +// AMDGCNSPIRV: [[IF_END31_I_I]]: +// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I32_I]] ], [ -87, %[[IF_ELSE_I_I]] ], [ -55, %[[IF_ELSE17_I_I]] ] // AMDGCNSPIRV-NEXT: [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I30_I7]], 4 // AMDGCNSPIRV-NEXT: [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64 // AMDGCNSPIRV-NEXT: [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]] // AMDGCNSPIRV-NEXT: [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I36_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I6]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I:%.*]] = icmp eq i8 [[TMP7]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I32_I]], !llvm.loop [[LOOP12]] -// AMDGCNSPIRV: while.cond.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I:%.*]], [[WHILE_BODY_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], [[WHILE_BODY_I_I]] ], [ 0, [[IF_THEN_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I32_I]], !llvm.loop [[LOOP12]] +// AMDGCNSPIRV: [[WHILE_COND_I_I]]: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I:%.*]], %[[WHILE_BODY_I_I:.*]] ], [ [[INCDEC_PTR_I]], %[[IF_THEN_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], %[[WHILE_BODY_I_I]] ], [ 0, %[[IF_THEN_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]] -// AMDGCNSPIRV: while.body.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I_I]] +// AMDGCNSPIRV: [[WHILE_BODY_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = and i8 [[TMP8]], -8 // AMDGCNSPIRV-NEXT: [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP9]], 48 // AMDGCNSPIRV-NEXT: [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I]], 3 @@ -367,14 +375,14 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], i64 [[__TAGP_ADDR_1_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I_I]] = select i1 [[OR_COND_I_I]], i64 [[SUB_I_I]], i64 [[__R_0_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I]], label [[WHILE_COND_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP8]] -// AMDGCNSPIRV: while.cond.i14.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I:%.*]], [[WHILE_BODY_I18_I:%.*]] ], [ [[P]], [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I26_I:%.*]], [[WHILE_BODY_I18_I]] ], [ 0, [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I]], label %[[WHILE_COND_I_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP8]] +// AMDGCNSPIRV: [[WHILE_COND_I14_I]]: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I:%.*]], %[[WHILE_BODY_I18_I:.*]] ], [ [[P]], %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I26_I:%.*]], %[[WHILE_BODY_I18_I]] ], [ 0, %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP10]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]] -// AMDGCNSPIRV: while.body.i18.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], label %[[WHILE_BODY_I18_I]] +// AMDGCNSPIRV: [[WHILE_BODY_I18_I]]: // AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = add i8 [[TMP10]], -48 // AMDGCNSPIRV-NEXT: [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP11]], 10 // AMDGCNSPIRV-NEXT: [[MUL_I20_I:%.*]] = mul i64 [[__R_0_I16_I]], 10 @@ -384,225 +392,261 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], i64 [[__TAGP_ADDR_1_I25_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I26_I]] = select i1 [[OR_COND_I19_I]], i64 [[SUB_I23_I]], i64 [[__R_0_I16_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I]], label [[WHILE_COND_I14_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]] -// AMDGCNSPIRV: _ZL15__make_mantissaPKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I]] ], [ 0, [[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ 0, [[IF_ELSE17_I_I]] ], [ 0, [[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I]], label %[[WHILE_COND_I14_I]], label %[[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]] +// AMDGCNSPIRV: [[_ZL15__MAKE_MANTISSAPKC_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, %[[IF_THEN5_I]] ], [ 0, %[[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], %[[WHILE_COND_I_I]] ], [ [[ADD28_I_I]], %[[IF_END31_I_I]] ], [ 0, %[[IF_ELSE17_I_I]] ], [ 0, %[[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], %[[WHILE_COND_I14_I]] ] // AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_0_I]] // extern "C" __device__ uint64_t test___make_mantissa(const char *p) { return __make_mantissa(p); } -// CHECK-LABEL: @test_abs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) +// CHECK-LABEL: define dso_local noundef range(i32 0, -2147483648) i32 @test_abs( +// CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) i32 @llvm.abs.i32(i32 [[X]], i1 true) // CHECK-NEXT: ret i32 [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_abs( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) addrspace(4) i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, -2147483648) i32 @test_abs( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3:[0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) addrspace(4) i32 @llvm.abs.i32(i32 [[X]], i1 true) // AMDGCNSPIRV-NEXT: ret i32 [[TMP0]] // extern "C" __device__ int test_abs(int x) { return abs(x); } -// CHECK-LABEL: @test_labs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// CHECK-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @test_labs( +// CHECK-SAME: i64 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X]], i1 true) // CHECK-NEXT: ret i64 [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_labs( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i64 0, -9223372036854775808) i64 @test_labs( +// AMDGCNSPIRV-SAME: i64 noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X]], i1 true) // AMDGCNSPIRV-NEXT: ret i64 [[TMP0]] // extern "C" __device__ long test_labs(long x) { return labs(x); } -// CHECK-LABEL: @test_llabs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// CHECK-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @test_llabs( +// CHECK-SAME: i64 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X]], i1 true) // CHECK-NEXT: ret i64 [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_llabs( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i64 0, -9223372036854775808) i64 @test_llabs( +// AMDGCNSPIRV-SAME: i64 noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X]], i1 true) // AMDGCNSPIRV-NEXT: ret i64 [[TMP0]] // extern "C" __device__ long long test_llabs(long x) { return llabs(x); } -// DEFAULT-LABEL: @test_acosf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]] +// DEFAULT-LABEL: define dso_local noundef float @test_acosf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_acosf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14:[0-9]+]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_acosf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14:[0-9]+]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_acosf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]] +// APPROX-LABEL: define dso_local noundef float @test_acosf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_acosf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]] +// NCRDIV-LABEL: define dso_local noundef float @test_acosf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_acosf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_acosf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4:[0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acos_f32(float noundef [[X]]) #[[ATTR12:[0-9]+]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_acosf(float x) { return acosf(x); } -// DEFAULT-LABEL: @test_acos( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_acos( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_acos( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_acos( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_acos( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_acos( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_acos( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_acos( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_acos( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_acos( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acos_f64(double noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_acos(double x) { return acos(x); } -// DEFAULT-LABEL: @test_acoshf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]] +// DEFAULT-LABEL: define dso_local noundef float @test_acoshf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR15:[0-9]+]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_acoshf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15:[0-9]+]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_acoshf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15:[0-9]+]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_acoshf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]] +// APPROX-LABEL: define dso_local noundef float @test_acoshf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR15:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_acoshf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]] +// NCRDIV-LABEL: define dso_local noundef float @test_acoshf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR15:[0-9]+]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_acoshf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_acoshf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5:[0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acosh_f32(float noundef [[X]]) #[[ATTR13:[0-9]+]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_acoshf(float x) { return acoshf(x); } -// DEFAULT-LABEL: @test_acosh( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_acosh( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_acosh( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_acosh( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_acosh( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_acosh( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_acosh( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_acosh( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_acosh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_acosh( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acosh_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_acosh(double x) { return acosh(x); } -// DEFAULT-LABEL: @test_asinf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_asinf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_asinf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_asinf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_asinf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_asinf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_asinf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_asinf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_asinf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_asinf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asin_f32(float noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_asinf(float x) { return asinf(x); } -// DEFAULT-LABEL: @test_asin( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_asin( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_asin( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_asin( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_asin( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_asin( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_asin( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_asin( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_asin( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_asin( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asin_f64(double noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_asin(double x) { @@ -610,1551 +654,1816 @@ extern "C" __device__ double test_asin(double x) { return asin(x); } -// DEFAULT-LABEL: @test_asinhf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_asinhf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_asinhf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_asinhf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_asinhf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_asinhf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_asinhf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_asinhf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_asinhf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_asinhf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asinh_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_asinhf(float x) { return asinhf(x); } -// DEFAULT-LABEL: @test_asinh( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_asinh( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_asinh( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_asinh( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_asinh( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_asinh( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_asinh( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_asinh( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_asinh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_asinh( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asinh_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_asinh(double x) { return asinh(x); } -// DEFAULT-LABEL: @test_atan2f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_atan2f( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_atan2f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_atan2f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_atan2f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_atan2f( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_atan2f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_atan2f( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atan2f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_atan2f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan2_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_atan2f(float x, float y) { return atan2f(x, y); } -// DEFAULT-LABEL: @test_atan2( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_atan2( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_atan2( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_atan2( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_atan2( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_atan2( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_atan2( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_atan2( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atan2( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_atan2( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan2_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_atan2(double x, double y) { return atan2(x, y); } -// DEFAULT-LABEL: @test_atanf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_atanf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_atanf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_atanf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_atanf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_atanf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_atanf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_atanf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_atanf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan_f32(float noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_atanf(float x) { return atanf(x); } -// DEFAULT-LABEL: @test_atan( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_atan( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_atan( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_atan( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_atan( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_atan( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_atan( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_atan( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atan( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_atan( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan_f64(double noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_atan(double x) { return atan(x); } -// DEFAULT-LABEL: @test_atanhf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_atanhf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_atanhf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_atanhf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_atanhf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_atanhf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_atanhf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_atanhf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atanhf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_atanhf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atanh_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_atanhf(float x) { return atanhf(x); } -// DEFAULT-LABEL: @test_atanh( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_atanh( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_atanh( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_atanh( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_atanh( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_atanh( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_atanh( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_atanh( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atanh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_atanh( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atanh_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_atanh(double x) { return atanh(x); } -// DEFAULT-LABEL: @test_cbrtf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_cbrtf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_cbrtf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cbrtf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_cbrtf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_cbrtf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_cbrtf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_cbrtf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cbrtf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cbrtf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cbrt_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_cbrtf(float x) { return cbrtf(x); } -// DEFAULT-LABEL: @test_cbrt( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_cbrt( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_cbrt( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cbrt( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_cbrt( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_cbrt( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_cbrt( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_cbrt( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cbrt( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cbrt( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cbrt_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_cbrt(double x) { return cbrt(x); } -// DEFAULT-LABEL: @test_ceilf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_ceilf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_ceilf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ceil.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_ceilf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ceil.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_ceilf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_ceilf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_ceilf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_ceilf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_ceilf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ceil.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_ceilf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ceil.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_ceilf(float x) { return ceilf(x); } -// DEFAULT-LABEL: @test_ceil( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_ceil( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_ceil( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ceil.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_ceil( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ceil.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_ceil( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_ceil( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_ceil( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_ceil( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_ceil( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ceil.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_ceil( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ceil.f64(double [[X]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_ceil(double x) { return ceil(x); } -// DEFAULT-LABEL: @test_copysignf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_copysignf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X]], float [[Y]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_copysignf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.copysign.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_copysignf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.copysign.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_copysignf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_copysignf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X]], float [[Y]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_copysignf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_copysignf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X]], float [[Y]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_copysignf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_copysignf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.copysign.f32(float [[X]], float [[Y]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_copysignf(float x, float y) { return copysignf(x, y); } -// DEFAULT-LABEL: @test_copysign( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_copysign( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X]], double [[Y]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_copysign( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.copysign.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_copysign( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.copysign.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_copysign( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_copysign( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X]], double [[Y]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_copysign( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_copysign( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X]], double [[Y]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_copysign( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_copysign( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.copysign.f64(double [[X]], double [[Y]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_copysign(double x, double y) { return copysign(x, y); } -// DEFAULT-LABEL: @test_cosf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]] +// DEFAULT-LABEL: define dso_local noundef float @test_cosf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR16:[0-9]+]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_cosf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16:[0-9]+]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cosf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16:[0-9]+]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_cosf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]] +// APPROX-LABEL: define dso_local noundef float @test_cosf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I1]] // -// NCRDIV-LABEL: @test_cosf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]] +// NCRDIV-LABEL: define dso_local noundef float @test_cosf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR16:[0-9]+]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cosf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cosf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6:[0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cos_f32(float noundef [[X]]) #[[ATTR14:[0-9]+]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_cosf(float x) { return cosf(x); } -// DEFAULT-LABEL: @test_cos( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_cos( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_cos( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cos( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_cos( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_cos( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_cos( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_cos( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cos( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cos( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cos_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_cos(double x) { return cos(x); } -// DEFAULT-LABEL: @test_coshf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_coshf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_coshf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_coshf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_coshf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_coshf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_coshf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_coshf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_coshf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_coshf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cosh_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_coshf(float x) { return coshf(x); } -// DEFAULT-LABEL: @test_cosh( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_cosh( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_cosh( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cosh( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_cosh( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_cosh( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_cosh( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_cosh( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cosh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cosh( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cosh_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_cosh(double x) { return cosh(x); } -// DEFAULT-LABEL: @test_cospif( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_cospif( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_cospif( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cospif( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_cospif( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_cospif( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_cospif( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_cospif( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cospif( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cospif( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cospi_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_cospif(float x) { return cospif(x); } -// DEFAULT-LABEL: @test_cospi( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_cospi( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_cospi( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cospi( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_cospi( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_cospi( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_cospi( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_cospi( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cospi( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cospi( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cospi_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_cospi(double x) { return cospi(x); } -// DEFAULT-LABEL: @test_cyl_bessel_i0f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_cyl_bessel_i0f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_cyl_bessel_i0f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cyl_bessel_i0f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_cyl_bessel_i0f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_cyl_bessel_i0f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_cyl_bessel_i0f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_cyl_bessel_i0f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cyl_bessel_i0f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i0_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_cyl_bessel_i0f(float x) { return cyl_bessel_i0f(x); } -// DEFAULT-LABEL: @test_cyl_bessel_i0( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_cyl_bessel_i0( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_cyl_bessel_i0( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cyl_bessel_i0( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_cyl_bessel_i0( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_cyl_bessel_i0( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_cyl_bessel_i0( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_cyl_bessel_i0( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cyl_bessel_i0( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i0_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_cyl_bessel_i0(double x) { return cyl_bessel_i0(x); } -// DEFAULT-LABEL: @test_cyl_bessel_i1f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_cyl_bessel_i1f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_cyl_bessel_i1f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_cyl_bessel_i1f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_cyl_bessel_i1f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_cyl_bessel_i1f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_cyl_bessel_i1f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_cyl_bessel_i1f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_cyl_bessel_i1f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i1_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_cyl_bessel_i1f(float x) { return cyl_bessel_i1f(x); } -// DEFAULT-LABEL: @test_cyl_bessel_i1( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_cyl_bessel_i1( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_cyl_bessel_i1( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_cyl_bessel_i1( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_cyl_bessel_i1( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_cyl_bessel_i1( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_cyl_bessel_i1( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_cyl_bessel_i1( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_cyl_bessel_i1( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i1_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_cyl_bessel_i1(double x) { return cyl_bessel_i1(x); } -// DEFAULT-LABEL: @test_erfcf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_erfcf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_erfcf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_erfcf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_erfcf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_erfcf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_erfcf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_erfcf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_erfcf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_erfcf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfc_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_erfcf(float x) { return erfcf(x); } -// DEFAULT-LABEL: @test_erfc( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_erfc( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_erfc( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_erfc( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_erfc( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_erfc( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_erfc( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_erfc( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_erfc( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_erfc( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfc_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_erfc(double x) { return erfc(x); } -// DEFAULT-LABEL: @test_erfinvf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_erfinvf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_erfinvf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_erfinvf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_erfinvf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_erfinvf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_erfinvf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_erfinvf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_erfinvf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_erfinvf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfinv_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_erfinvf(float x) { return erfinvf(x); } -// DEFAULT-LABEL: @test_erfinv( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_erfinv( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_erfinv( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_erfinv( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_erfinv( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_erfinv( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_erfinv( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_erfinv( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_erfinv( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_erfinv( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfinv_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_erfinv(double x) { return erfinv(x); } -// DEFAULT-LABEL: @test_exp10f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_exp10f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_exp10f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp10.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_exp10f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp10.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_exp10f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_exp10f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_exp10f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_exp10f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp10.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_exp10f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp10.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_exp10f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp10.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_exp10f(float x) { return exp10f(x); } -// DEFAULT-LABEL: @test_exp10( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_exp10( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_exp10( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_exp10( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_exp10( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_exp10( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_exp10( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_exp10( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_exp10( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_exp10( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp10_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_exp10(double x) { return exp10(x); } -// DEFAULT-LABEL: @test_exp2f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_exp2f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_exp2f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp2.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_exp2f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp2.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_exp2f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_exp2f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_exp2f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_exp2f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_exp2f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp2.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_exp2f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp2.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_exp2f(float x) { return exp2f(x); } -// DEFAULT-LABEL: @test_exp2( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_exp2( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_exp2( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_exp2( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_exp2( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_exp2( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_exp2( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_exp2( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_exp2( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_exp2( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp2_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_exp2(double x) { return exp2(x); } -// DEFAULT-LABEL: @test_expf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_expf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_expf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_expf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.exp.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_expf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_expf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_expf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_expf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_expf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_expf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_expf(float x) { return expf(x); } -// DEFAULT-LABEL: @test_exp( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_exp( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_exp( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_exp( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_exp( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_exp( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_exp( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_exp( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_exp( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_exp( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_exp(double x) { return exp(x); } -// DEFAULT-LABEL: @test_expm1f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_expm1f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_expm1f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_expm1f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_expm1f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_expm1f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_expm1f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_expm1f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_expm1f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_expm1f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_expm1_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_expm1f(float x) { return expm1f(x); } -// DEFAULT-LABEL: @test_expm1( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_expm1( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_expm1( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_expm1( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_expm1( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_expm1( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_expm1( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_expm1( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_expm1( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_expm1( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_expm1_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_expm1(double x) { return expm1(x); } -// DEFAULT-LABEL: @test_fabsf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_fabsf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_fabsf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fabs.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fabsf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fabs.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_fabsf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_fabsf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_fabsf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_fabsf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fabsf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fabs.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fabsf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fabs.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_fabsf(float x) { return fabsf(x); } -// DEFAULT-LABEL: @test_fabs( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_fabs( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_fabs( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fabs.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fabs( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fabs.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_fabs( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_fabs( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_fabs( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_fabs( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fabs( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fabs.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fabs( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fabs.f64(double [[X]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_fabs(double x) { return fabs(x); } -// DEFAULT-LABEL: @test_fdimf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_fdimf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_fdimf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fdimf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_fdimf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_fdimf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_fdimf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_fdimf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_fdimf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fdimf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fdim_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_fdimf(float x, float y) { return fdimf(x, y); } -// DEFAULT-LABEL: @test_fdim( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_fdim( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_fdim( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fdim( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_fdim( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_fdim( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_fdim( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_fdim( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_fdim( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fdim( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fdim_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_fdim(double x, double y) { return fdim(x, y); } -// DEFAULT-LABEL: @test_fdividef( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] +// DEFAULT-LABEL: define dso_local noundef float @test_fdividef( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]] // DEFAULT-NEXT: ret float [[DIV_I]] // -// FINITEONLY-LABEL: @test_fdividef( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float [[X:%.*]], [[Y:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_fdividef( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float [[X]], [[Y]] // FINITEONLY-NEXT: ret float [[DIV_I]] // -// APPROX-LABEL: @test_fdividef( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] +// APPROX-LABEL: define dso_local noundef float @test_fdividef( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]] // APPROX-NEXT: ret float [[DIV_I]] // -// NCRDIV-LABEL: @test_fdividef( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]], !fpmath [[META12:![0-9]+]] +// NCRDIV-LABEL: define dso_local noundef float @test_fdividef( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]], !fpmath [[META12:![0-9]+]] // NCRDIV-NEXT: ret float [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test_fdividef( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fdividef( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]] // AMDGCNSPIRV-NEXT: ret float [[DIV_I]] // extern "C" __device__ float test_fdividef(float x, float y) { return fdividef(x, y); } -// DEFAULT-LABEL: @test_floorf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_floorf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_floorf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.floor.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_floorf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.floor.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_floorf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_floorf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_floorf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_floorf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_floorf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.floor.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_floorf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.floor.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_floorf(float x) { return floorf(x); } -// DEFAULT-LABEL: @test_floor( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_floor( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_floor( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.floor.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_floor( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.floor.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_floor( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_floor( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_floor( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_floor( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_floor( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.floor.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_floor( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.floor.f64(double [[X]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_floor(double x) { return floor(x); } -// DEFAULT-LABEL: @test_fmaf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_fmaf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_fmaf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fma.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]], float nofpclass(nan inf) [[Z:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fmaf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fma.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]], float nofpclass(nan inf) [[Z]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_fmaf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_fmaf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_fmaf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_fmaf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fmaf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fmaf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_fmaf(float x, float y, float z) { return fmaf(x, y, z); } -// DEFAULT-LABEL: @test_fma( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_fma( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_fma( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]], double nofpclass(nan inf) [[Z:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fma( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]], double nofpclass(nan inf) [[Z]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_fma( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_fma( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_fma( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_fma( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fma( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fma( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_fma(double x, double y, double z) { return fma(x, y, z); } -// DEFAULT-LABEL: @test_fma_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_fma_rn( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_fma_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]], double nofpclass(nan inf) [[Z:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fma_rn( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]], double nofpclass(nan inf) [[Z]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_fma_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_fma_rn( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_fma_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_fma_rn( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fma_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fma_rn( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_fma_rn(double x, double y, double z) { return __fma_rn(x, y, z); } -// DEFAULT-LABEL: @test_fmaxf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_fmaxf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_fmaxf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.maxnum.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fmaxf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.maxnum.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_fmaxf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_fmaxf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_fmaxf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_fmaxf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fmaxf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fmaxf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X]], float [[Y]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_fmaxf(float x, float y) { return fmaxf(x, y); } -// DEFAULT-LABEL: @test_fmax( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_fmax( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_fmax( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.maxnum.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fmax( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.maxnum.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_fmax( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_fmax( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_fmax( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_fmax( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fmax( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fmax( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X]], double [[Y]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_fmax(double x, double y) { return fmax(x, y); } -// DEFAULT-LABEL: @test_fminf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_fminf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_fminf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fminf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_fminf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_fminf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_fminf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_fminf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fminf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fminf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X]], float [[Y]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_fminf(float x, float y) { return fminf(x, y); } -// DEFAULT-LABEL: @test_fmin( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_fmin( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_fmin( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.minnum.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fmin( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.minnum.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_fmin( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_fmin( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_fmin( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_fmin( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fmin( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fmin( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X]], double [[Y]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_fmin(double x, double y) { return fmin(x, y); } -// DEFAULT-LABEL: @test_fmodf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_fmodf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_fmodf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_fmodf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_fmodf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_fmodf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_fmodf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_fmodf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_fmodf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_fmodf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fmod_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_fmodf(float x, float y) { return fmodf(x, y); } -// DEFAULT-LABEL: @test_fmod( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_fmod( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_fmod( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_fmod( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_fmod( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_fmod( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_fmod( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_fmod( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_fmod( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_fmod( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fmod_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_fmod(double x, double y) { return fmod(x, y); } -// DEFAULT-LABEL: @test_frexpf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_frexpf( +// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) // DEFAULT-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// DEFAULT-NEXT: store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12:![0-9]+]] +// DEFAULT-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] // DEFAULT-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // DEFAULT-NEXT: ret float [[TMP2]] // -// FINITEONLY-LABEL: @test_frexpf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_frexpf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// FINITEONLY-NEXT: store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12:![0-9]+]] +// FINITEONLY-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] // FINITEONLY-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // FINITEONLY-NEXT: ret float [[TMP2]] // -// APPROX-LABEL: @test_frexpf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_frexpf( +// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) // APPROX-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// APPROX-NEXT: store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12:![0-9]+]] +// APPROX-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] // APPROX-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // APPROX-NEXT: ret float [[TMP2]] // -// NCRDIV-LABEL: @test_frexpf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_frexpf( +// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) // NCRDIV-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// NCRDIV-NEXT: store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA13:![0-9]+]] +// NCRDIV-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA13:![0-9]+]] // NCRDIV-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // NCRDIV-NEXT: ret float [[TMP2]] // -// AMDGCNSPIRV-LABEL: @test_frexpf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_frexpf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR7:[0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[X]]) // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA13:![0-9]+]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y]], align 4, !tbaa [[INT_TBAA13:![0-9]+]] // AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // AMDGCNSPIRV-NEXT: ret float [[TMP2]] // @@ -2162,43 +2471,48 @@ extern "C" __device__ float test_frexpf(float x, int* y) { return frexpf(x, y); } -// DEFAULT-LABEL: @test_frexp( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_frexp( +// DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X]]) // DEFAULT-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// DEFAULT-NEXT: store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12]] +// DEFAULT-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] // DEFAULT-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // DEFAULT-NEXT: ret double [[TMP2]] // -// FINITEONLY-LABEL: @test_frexp( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_frexp( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// FINITEONLY-NEXT: store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12]] +// FINITEONLY-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] // FINITEONLY-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // FINITEONLY-NEXT: ret double [[TMP2]] // -// APPROX-LABEL: @test_frexp( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_frexp( +// APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X]]) // APPROX-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// APPROX-NEXT: store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA12]] +// APPROX-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA12]] // APPROX-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // APPROX-NEXT: ret double [[TMP2]] // -// NCRDIV-LABEL: @test_frexp( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_frexp( +// NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR7]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call { double, i32 } @llvm.frexp.f64.i32(double [[X]]) // NCRDIV-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// NCRDIV-NEXT: store i32 [[TMP1]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA13]] +// NCRDIV-NEXT: store i32 [[TMP1]], ptr [[Y]], align 4, !tbaa [[INT_TBAA13]] // NCRDIV-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // NCRDIV-NEXT: ret double [[TMP2]] // -// AMDGCNSPIRV-LABEL: @test_frexp( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_frexp( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR7]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[X]]) // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y]], align 4, !tbaa [[INT_TBAA13]] // AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // AMDGCNSPIRV-NEXT: ret double [[TMP2]] // @@ -2206,150 +2520,175 @@ extern "C" __device__ double test_frexp(double x, int* y) { return frexp(x, y); } -// DEFAULT-LABEL: @test_hypotf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_hypotf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_hypotf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_hypotf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_hypotf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_hypotf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_hypotf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_hypotf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_hypotf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_hypotf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_hypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_hypotf(float x, float y) { return hypotf(x, y); } -// DEFAULT-LABEL: @test_hypot( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_hypot( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_hypot( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_hypot( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_hypot( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_hypot( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_hypot( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_hypot( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_hypot( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_hypot( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_hypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_hypot(double x, double y) { return hypot(x, y); } -// DEFAULT-LABEL: @test_ilogbf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef i32 @test_ilogbf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret i32 [[CALL_I]] // -// FINITEONLY-LABEL: @test_ilogbf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef i32 @test_ilogbf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret i32 [[CALL_I]] // -// APPROX-LABEL: @test_ilogbf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef i32 @test_ilogbf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret i32 [[CALL_I]] // -// NCRDIV-LABEL: @test_ilogbf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef i32 @test_ilogbf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret i32 [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_ilogbf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_ilogbf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f32(float noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret i32 [[CALL_I]] // extern "C" __device__ int test_ilogbf(float x) { return ilogbf(x); } -// DEFAULT-LABEL: @test_ilogb( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef i32 @test_ilogb( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret i32 [[CALL_I]] // -// FINITEONLY-LABEL: @test_ilogb( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef i32 @test_ilogb( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret i32 [[CALL_I]] // -// APPROX-LABEL: @test_ilogb( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef i32 @test_ilogb( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret i32 [[CALL_I]] // -// NCRDIV-LABEL: @test_ilogb( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef i32 @test_ilogb( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret i32 [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_ilogb( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_ilogb( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f64(double noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret i32 [[CALL_I]] // extern "C" __device__ int test_ilogb(double x) { return ilogb(x); } -// DEFAULT-LABEL: @test___finitef( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) // DEFAULT-NEXT: [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000 // DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // DEFAULT-NEXT: ret i32 [[CONV]] // -// FINITEONLY-LABEL: @test___finitef( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 1 // -// APPROX-LABEL: @test___finitef( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) // APPROX-NEXT: [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: @test___finitef( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) // NCRDIV-NEXT: [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___finitef( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finitef( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]]) // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000 // AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // AMDGCNSPIRV-NEXT: ret i32 [[CONV]] @@ -2358,34 +2697,39 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { return __finitef(x); } -// DEFAULT-LABEL: @test___finite( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finite( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) // DEFAULT-NEXT: [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000 // DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // DEFAULT-NEXT: ret i32 [[CONV]] // -// FINITEONLY-LABEL: @test___finite( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 1 // -// APPROX-LABEL: @test___finite( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finite( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) // APPROX-NEXT: [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: @test___finite( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finite( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) // NCRDIV-NEXT: [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___finite( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finite( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]]) // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000 // AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // AMDGCNSPIRV-NEXT: ret i32 [[CONV]] @@ -2394,34 +2738,39 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { return __finite(x); } -// DEFAULT-LABEL: @test___isinff( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) // DEFAULT-NEXT: [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000 // DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // DEFAULT-NEXT: ret i32 [[CONV]] // -// FINITEONLY-LABEL: @test___isinff( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: @test___isinff( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) // APPROX-NEXT: [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: @test___isinff( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) // NCRDIV-NEXT: [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___isinff( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinff( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]]) // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000 // AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // AMDGCNSPIRV-NEXT: ret i32 [[CONV]] @@ -2430,34 +2779,39 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { return __isinff(x); } -// DEFAULT-LABEL: @test___isinf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) // DEFAULT-NEXT: [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000 // DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // DEFAULT-NEXT: ret i32 [[CONV]] // -// FINITEONLY-LABEL: @test___isinf( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: @test___isinf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) // APPROX-NEXT: [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: @test___isinf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) // NCRDIV-NEXT: [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___isinf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinf( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]]) // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000 // AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // AMDGCNSPIRV-NEXT: ret i32 [[CONV]] @@ -2466,31 +2820,36 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) { return __isinf(x); } -// DEFAULT-LABEL: @test___isnanf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 +// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 // DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // DEFAULT-NEXT: ret i32 [[CONV]] // -// FINITEONLY-LABEL: @test___isnanf( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: @test___isnanf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 +// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: @test___isnanf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 +// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___isnanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 +// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnanf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 // AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // AMDGCNSPIRV-NEXT: ret i32 [[CONV]] // @@ -2498,31 +2857,36 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) { return __isnanf(x); } -// DEFAULT-LABEL: @test___isnan( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00 +// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 // DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // DEFAULT-NEXT: ret i32 [[CONV]] // -// FINITEONLY-LABEL: @test___isnan( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: @test___isnan( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00 +// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: @test___isnan( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00 +// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___isnan( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00 +// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnan( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 // AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // AMDGCNSPIRV-NEXT: ret i32 [[CONV]] // @@ -2530,143 +2894,164 @@ extern "C" __device__ BOOL_TYPE test___isnan(double x) { return __isnan(x); } -// DEFAULT-LABEL: @test_j0f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_j0f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_j0f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_j0f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_j0f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_j0f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_j0f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_j0f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_j0f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_j0f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_j0f(float x) { return j0f(x); } -// DEFAULT-LABEL: @test_j0( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_j0( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_j0( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_j0( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_j0( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_j0( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_j0( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_j0( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_j0( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_j0( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_j0(double x) { return j0(x); } -// DEFAULT-LABEL: @test_j1f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_j1f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_j1f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_j1f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_j1f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_j1f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_j1f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_j1f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_j1f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_j1f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_j1f(float x) { return j1f(x); } -// DEFAULT-LABEL: @test_j1( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_j1( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_j1( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_j1( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_j1( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_j1( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_j1( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_j1( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_j1( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_j1( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_j1(double x) { return j1(x); } -// DEFAULT-LABEL: @test_jnf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// DEFAULT-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// DEFAULT-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// DEFAULT-LABEL: define dso_local float @test_jnf( +// DEFAULT-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// DEFAULT-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// DEFAULT-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // DEFAULT-NEXT: ] -// DEFAULT: if.then.i: -// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]] -// DEFAULT-NEXT: br label [[_ZL3JNFIF_EXIT:%.*]] -// DEFAULT: if.then2.i: +// DEFAULT: [[IF_THEN_I]]: +// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: br label %[[_ZL3JNFIF_EXIT:.*]] +// DEFAULT: [[IF_THEN2_I]]: // DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] -// DEFAULT-NEXT: br label [[_ZL3JNFIF_EXIT]] -// DEFAULT: if.end4.i: +// DEFAULT-NEXT: br label %[[_ZL3JNFIF_EXIT]] +// DEFAULT: [[IF_END4_I]]: // DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] // DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] // DEFAULT-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// DEFAULT-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]] -// DEFAULT: for.body.i: -// DEFAULT-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// DEFAULT-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]] +// DEFAULT: [[FOR_BODY_I]]: +// DEFAULT-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // DEFAULT-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // DEFAULT-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] @@ -2674,32 +3059,33 @@ extern "C" __device__ double test_j1(double x) { // DEFAULT-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // DEFAULT-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]] -// DEFAULT: _ZL3jnfif.exit: -// DEFAULT-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]] +// DEFAULT: [[_ZL3JNFIF_EXIT]]: +// DEFAULT-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // DEFAULT-NEXT: ret float [[RETVAL_0_I]] // -// FINITEONLY-LABEL: @test_jnf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// FINITEONLY-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// FINITEONLY-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_jnf( +// FINITEONLY-SAME: i32 noundef [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// FINITEONLY-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// FINITEONLY-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // FINITEONLY-NEXT: ] -// FINITEONLY: if.then.i: -// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]] -// FINITEONLY-NEXT: br label [[_ZL3JNFIF_EXIT:%.*]] -// FINITEONLY: if.then2.i: +// FINITEONLY: [[IF_THEN_I]]: +// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: br label %[[_ZL3JNFIF_EXIT:.*]] +// FINITEONLY: [[IF_THEN2_I]]: // FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] -// FINITEONLY-NEXT: br label [[_ZL3JNFIF_EXIT]] -// FINITEONLY: if.end4.i: +// FINITEONLY-NEXT: br label %[[_ZL3JNFIF_EXIT]] +// FINITEONLY: [[IF_END4_I]]: // FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] // FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] // FINITEONLY-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// FINITEONLY-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]] -// FINITEONLY: for.body.i: -// FINITEONLY-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// FINITEONLY-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]] +// FINITEONLY: [[FOR_BODY_I]]: +// FINITEONLY-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // FINITEONLY-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // FINITEONLY-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float [[CONV_I]], [[Y]] @@ -2707,32 +3093,33 @@ extern "C" __device__ double test_j1(double x) { // FINITEONLY-NEXT: [[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], [[__X0_0_I2]] // FINITEONLY-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]] -// FINITEONLY: _ZL3jnfif.exit: -// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]] +// FINITEONLY: [[_ZL3JNFIF_EXIT]]: +// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // FINITEONLY-NEXT: ret float [[RETVAL_0_I]] // -// APPROX-LABEL: @test_jnf( -// APPROX-NEXT: entry: -// APPROX-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// APPROX-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// APPROX-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// APPROX-LABEL: define dso_local float @test_jnf( +// APPROX-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// APPROX-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// APPROX-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // APPROX-NEXT: ] -// APPROX: if.then.i: -// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]] -// APPROX-NEXT: br label [[_ZL3JNFIF_EXIT:%.*]] -// APPROX: if.then2.i: +// APPROX: [[IF_THEN_I]]: +// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: br label %[[_ZL3JNFIF_EXIT:.*]] +// APPROX: [[IF_THEN2_I]]: // APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] -// APPROX-NEXT: br label [[_ZL3JNFIF_EXIT]] -// APPROX: if.end4.i: +// APPROX-NEXT: br label %[[_ZL3JNFIF_EXIT]] +// APPROX: [[IF_END4_I]]: // APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] // APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] // APPROX-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// APPROX-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]] -// APPROX: for.body.i: -// APPROX-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// APPROX-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// APPROX-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// APPROX-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]] +// APPROX: [[FOR_BODY_I]]: +// APPROX-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// APPROX-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// APPROX-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // APPROX-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // APPROX-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] @@ -2740,32 +3127,33 @@ extern "C" __device__ double test_j1(double x) { // APPROX-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // APPROX-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // APPROX-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// APPROX-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]] -// APPROX: _ZL3jnfif.exit: -// APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// APPROX-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]] +// APPROX: [[_ZL3JNFIF_EXIT]]: +// APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // APPROX-NEXT: ret float [[RETVAL_0_I]] // -// NCRDIV-LABEL: @test_jnf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// NCRDIV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// NCRDIV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// NCRDIV-LABEL: define dso_local float @test_jnf( +// NCRDIV-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// NCRDIV-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// NCRDIV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // NCRDIV-NEXT: ] -// NCRDIV: if.then.i: -// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]] -// NCRDIV-NEXT: br label [[_ZL3JNFIF_EXIT:%.*]] -// NCRDIV: if.then2.i: +// NCRDIV: [[IF_THEN_I]]: +// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: br label %[[_ZL3JNFIF_EXIT:.*]] +// NCRDIV: [[IF_THEN2_I]]: // NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] -// NCRDIV-NEXT: br label [[_ZL3JNFIF_EXIT]] -// NCRDIV: if.end4.i: +// NCRDIV-NEXT: br label %[[_ZL3JNFIF_EXIT]] +// NCRDIV: [[IF_END4_I]]: // NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]] // NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]] // NCRDIV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// NCRDIV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]] -// NCRDIV: for.body.i: -// NCRDIV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// NCRDIV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// NCRDIV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]] +// NCRDIV: [[FOR_BODY_I]]: +// NCRDIV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// NCRDIV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // NCRDIV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]], !fpmath [[META12]] @@ -2773,32 +3161,33 @@ extern "C" __device__ double test_j1(double x) { // NCRDIV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // NCRDIV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// NCRDIV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] -// NCRDIV: _ZL3jnfif.exit: -// NCRDIV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// NCRDIV-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] +// NCRDIV: [[_ZL3JNFIF_EXIT]]: +// NCRDIV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // NCRDIV-NEXT: ret float [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test_jnf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func float @test_jnf( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// AMDGCNSPIRV-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// AMDGCNSPIRV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then.i: -// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL3JNFIF_EXIT:%.*]] -// AMDGCNSPIRV: if.then2.i: +// AMDGCNSPIRV: [[IF_THEN_I]]: +// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label %[[_ZL3JNFIF_EXIT:.*]] +// AMDGCNSPIRV: [[IF_THEN2_I]]: // AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL3JNFIF_EXIT]] -// AMDGCNSPIRV: if.end4.i: +// AMDGCNSPIRV-NEXT: br label %[[_ZL3JNFIF_EXIT]] +// AMDGCNSPIRV: [[IF_END4_I]]: // AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]] -// AMDGCNSPIRV: for.body.i: -// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3JNFIF_EXIT]] +// AMDGCNSPIRV: [[FOR_BODY_I]]: +// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] @@ -2806,36 +3195,37 @@ extern "C" __device__ double test_j1(double x) { // AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] -// AMDGCNSPIRV: _ZL3jnfif.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3JNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] +// AMDGCNSPIRV: [[_ZL3JNFIF_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // AMDGCNSPIRV-NEXT: ret float [[RETVAL_0_I]] // extern "C" __device__ float test_jnf(int x, float y) { return jnf(x, y); } -// DEFAULT-LABEL: @test_jn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// DEFAULT-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// DEFAULT-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// DEFAULT-LABEL: define dso_local double @test_jn( +// DEFAULT-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// DEFAULT-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// DEFAULT-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // DEFAULT-NEXT: ] -// DEFAULT: if.then.i: -// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]] -// DEFAULT-NEXT: br label [[_ZL2JNID_EXIT:%.*]] -// DEFAULT: if.then2.i: +// DEFAULT: [[IF_THEN_I]]: +// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: br label %[[_ZL2JNID_EXIT:.*]] +// DEFAULT: [[IF_THEN2_I]]: // DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] -// DEFAULT-NEXT: br label [[_ZL2JNID_EXIT]] -// DEFAULT: if.end4.i: +// DEFAULT-NEXT: br label %[[_ZL2JNID_EXIT]] +// DEFAULT: [[IF_END4_I]]: // DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] // DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] // DEFAULT-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// DEFAULT-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]] -// DEFAULT: for.body.i: -// DEFAULT-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// DEFAULT-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]] +// DEFAULT: [[FOR_BODY_I]]: +// DEFAULT-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // DEFAULT-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // DEFAULT-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] @@ -2843,32 +3233,33 @@ extern "C" __device__ float test_jnf(int x, float y) { // DEFAULT-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] // DEFAULT-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] -// DEFAULT: _ZL2jnid.exit: -// DEFAULT-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] +// DEFAULT: [[_ZL2JNID_EXIT]]: +// DEFAULT-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // DEFAULT-NEXT: ret double [[RETVAL_0_I]] // -// FINITEONLY-LABEL: @test_jn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// FINITEONLY-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// FINITEONLY-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_jn( +// FINITEONLY-SAME: i32 noundef [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// FINITEONLY-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// FINITEONLY-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // FINITEONLY-NEXT: ] -// FINITEONLY: if.then.i: -// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]] -// FINITEONLY-NEXT: br label [[_ZL2JNID_EXIT:%.*]] -// FINITEONLY: if.then2.i: +// FINITEONLY: [[IF_THEN_I]]: +// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: br label %[[_ZL2JNID_EXIT:.*]] +// FINITEONLY: [[IF_THEN2_I]]: // FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] -// FINITEONLY-NEXT: br label [[_ZL2JNID_EXIT]] -// FINITEONLY: if.end4.i: +// FINITEONLY-NEXT: br label %[[_ZL2JNID_EXIT]] +// FINITEONLY: [[IF_END4_I]]: // FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] // FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] // FINITEONLY-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// FINITEONLY-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]] -// FINITEONLY: for.body.i: -// FINITEONLY-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// FINITEONLY-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]] +// FINITEONLY: [[FOR_BODY_I]]: +// FINITEONLY-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // FINITEONLY-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // FINITEONLY-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract double [[CONV_I]], [[Y]] @@ -2876,32 +3267,33 @@ extern "C" __device__ float test_jnf(int x, float y) { // FINITEONLY-NEXT: [[SUB_I]] = fsub nnan ninf contract double [[MUL8_I]], [[__X0_0_I2]] // FINITEONLY-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] -// FINITEONLY: _ZL2jnid.exit: -// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] +// FINITEONLY: [[_ZL2JNID_EXIT]]: +// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // FINITEONLY-NEXT: ret double [[RETVAL_0_I]] // -// APPROX-LABEL: @test_jn( -// APPROX-NEXT: entry: -// APPROX-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// APPROX-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// APPROX-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// APPROX-LABEL: define dso_local double @test_jn( +// APPROX-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// APPROX-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// APPROX-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // APPROX-NEXT: ] -// APPROX: if.then.i: -// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]] -// APPROX-NEXT: br label [[_ZL2JNID_EXIT:%.*]] -// APPROX: if.then2.i: +// APPROX: [[IF_THEN_I]]: +// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: br label %[[_ZL2JNID_EXIT:.*]] +// APPROX: [[IF_THEN2_I]]: // APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] -// APPROX-NEXT: br label [[_ZL2JNID_EXIT]] -// APPROX: if.end4.i: +// APPROX-NEXT: br label %[[_ZL2JNID_EXIT]] +// APPROX: [[IF_END4_I]]: // APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] // APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] // APPROX-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// APPROX-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]] -// APPROX: for.body.i: -// APPROX-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// APPROX-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// APPROX-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// APPROX-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]] +// APPROX: [[FOR_BODY_I]]: +// APPROX-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// APPROX-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// APPROX-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // APPROX-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // APPROX-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] @@ -2909,32 +3301,33 @@ extern "C" __device__ float test_jnf(int x, float y) { // APPROX-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] // APPROX-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // APPROX-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// APPROX-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] -// APPROX: _ZL2jnid.exit: -// APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// APPROX-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] +// APPROX: [[_ZL2JNID_EXIT]]: +// APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // APPROX-NEXT: ret double [[RETVAL_0_I]] // -// NCRDIV-LABEL: @test_jn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// NCRDIV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// NCRDIV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// NCRDIV-LABEL: define dso_local double @test_jn( +// NCRDIV-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// NCRDIV-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// NCRDIV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // NCRDIV-NEXT: ] -// NCRDIV: if.then.i: -// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]] -// NCRDIV-NEXT: br label [[_ZL2JNID_EXIT:%.*]] -// NCRDIV: if.then2.i: +// NCRDIV: [[IF_THEN_I]]: +// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: br label %[[_ZL2JNID_EXIT:.*]] +// NCRDIV: [[IF_THEN2_I]]: // NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] -// NCRDIV-NEXT: br label [[_ZL2JNID_EXIT]] -// NCRDIV: if.end4.i: +// NCRDIV-NEXT: br label %[[_ZL2JNID_EXIT]] +// NCRDIV: [[IF_END4_I]]: // NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]] // NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]] // NCRDIV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// NCRDIV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]] -// NCRDIV: for.body.i: -// NCRDIV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// NCRDIV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// NCRDIV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]] +// NCRDIV: [[FOR_BODY_I]]: +// NCRDIV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// NCRDIV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // NCRDIV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] @@ -2942,32 +3335,33 @@ extern "C" __device__ float test_jnf(int x, float y) { // NCRDIV-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] // NCRDIV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// NCRDIV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]] -// NCRDIV: _ZL2jnid.exit: -// NCRDIV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// NCRDIV-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]] +// NCRDIV: [[_ZL2JNID_EXIT]]: +// NCRDIV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // NCRDIV-NEXT: ret double [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test_jn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func double @test_jn( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// AMDGCNSPIRV-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// AMDGCNSPIRV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then.i: -// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL2JNID_EXIT:%.*]] -// AMDGCNSPIRV: if.then2.i: +// AMDGCNSPIRV: [[IF_THEN_I]]: +// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label %[[_ZL2JNID_EXIT:.*]] +// AMDGCNSPIRV: [[IF_THEN2_I]]: // AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL2JNID_EXIT]] -// AMDGCNSPIRV: if.end4.i: +// AMDGCNSPIRV-NEXT: br label %[[_ZL2JNID_EXIT]] +// AMDGCNSPIRV: [[IF_END4_I]]: // AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]] -// AMDGCNSPIRV: for.body.i: -// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2JNID_EXIT]] +// AMDGCNSPIRV: [[FOR_BODY_I]]: +// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] @@ -2975,158 +3369,183 @@ extern "C" __device__ float test_jnf(int x, float y) { // AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] // AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]] -// AMDGCNSPIRV: _ZL2jnid.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2JNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]] +// AMDGCNSPIRV: [[_ZL2JNID_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // AMDGCNSPIRV-NEXT: ret double [[RETVAL_0_I]] // extern "C" __device__ double test_jn(int x, double y) { return jn(x, y); } -// DEFAULT-LABEL: @test_ldexpf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_ldexpf( +// DEFAULT-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_ldexpf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X:%.*]], i32 [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_ldexpf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X]], i32 [[Y]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_ldexpf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_ldexpf( +// APPROX-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_ldexpf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_ldexpf( +// NCRDIV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_ldexpf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_ldexpf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_ldexpf(float x, int y) { return ldexpf(x, y); } -// DEFAULT-LABEL: @test_ldexp( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_ldexp( +// DEFAULT-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_ldexp( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X:%.*]], i32 [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_ldexp( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X]], i32 [[Y]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_ldexp( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_ldexp( +// APPROX-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_ldexp( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_ldexp( +// NCRDIV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_ldexp( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_ldexp( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_ldexp(double x, int y) { return ldexp(x, y); } -// DEFAULT-LABEL: @test_lgammaf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_lgammaf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_lgammaf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_lgammaf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_lgammaf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_lgammaf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_lgammaf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_lgammaf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_lgammaf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_lgammaf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_lgamma_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_lgammaf(float x) { return lgammaf(x); } -// DEFAULT-LABEL: @test_lgamma( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_lgamma( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_lgamma( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_lgamma( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_lgamma( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_lgamma( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_lgamma( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_lgamma( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_lgamma( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_lgamma( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_lgamma_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_lgamma(double x) { return lgamma(x); } -// DEFAULT-LABEL: @test_llrintf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local i64 @test_llrintf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) // DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // -// FINITEONLY-LABEL: @test_llrintf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local i64 @test_llrintf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // -// APPROX-LABEL: @test_llrintf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local i64 @test_llrintf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// NCRDIV-LABEL: @test_llrintf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local i64 @test_llrintf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) // NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_llrintf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrintf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]]) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // @@ -3134,33 +3553,38 @@ extern "C" __device__ long long int test_llrintf(float x) { return llrintf(x); } -// DEFAULT-LABEL: @test_llrint( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local i64 @test_llrint( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) // DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // -// FINITEONLY-LABEL: @test_llrint( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local i64 @test_llrint( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // -// APPROX-LABEL: @test_llrint( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local i64 @test_llrint( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// NCRDIV-LABEL: @test_llrint( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local i64 @test_llrint( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) // NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_llrint( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrint( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]]) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // @@ -3168,33 +3592,38 @@ extern "C" __device__ long long int test_llrint(double x) { return llrint(x); } -// DEFAULT-LABEL: @test_llroundf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local i64 @test_llroundf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) // DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // -// FINITEONLY-LABEL: @test_llroundf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local i64 @test_llroundf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // -// APPROX-LABEL: @test_llroundf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local i64 @test_llroundf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// NCRDIV-LABEL: @test_llroundf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local i64 @test_llroundf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) // NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_llroundf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llroundf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]]) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // @@ -3202,33 +3631,38 @@ extern "C" __device__ long long int test_llroundf(float x) { return llroundf(x); } -// DEFAULT-LABEL: @test_llround( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local i64 @test_llround( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) // DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // -// FINITEONLY-LABEL: @test_llround( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local i64 @test_llround( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // -// APPROX-LABEL: @test_llround( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local i64 @test_llround( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// NCRDIV-LABEL: @test_llround( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local i64 @test_llround( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) // NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_llround( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llround( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]]) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // @@ -3236,294 +3670,344 @@ extern "C" __device__ long long int test_llround(double x) { return llround(x); } -// DEFAULT-LABEL: @test_log10f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_log10f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_log10f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log10.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_log10f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log10.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_log10f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_log10f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_log10f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_log10f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_log10f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_log10f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_log10f(float x) { return log10f(x); } -// DEFAULT-LABEL: @test_log10( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_log10( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_log10( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_log10( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_log10( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_log10( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_log10( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_log10( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_log10( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_log10( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log10_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_log10(double x) { return log10(x); } -// DEFAULT-LABEL: @test_log1pf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_log1pf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_log1pf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_log1pf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_log1pf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_log1pf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_log1pf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_log1pf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_log1pf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_log1pf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log1p_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_log1pf(float x) { return log1pf(x); } -// DEFAULT-LABEL: @test_log1p( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_log1p( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_log1p( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_log1p( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_log1p( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_log1p( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_log1p( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_log1p( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_log1p( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_log1p( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log1p_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_log1p(double x) { return log1p(x); } -// DEFAULT-LABEL: @test_log2f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log2.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_log2f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log2.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_log2f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log2.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_log2f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log2.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_log2f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_log2f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_log2f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log2.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_log2f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log2.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_log2f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log2.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_log2f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log2.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_log2f(float x) { return log2f(x); } -// DEFAULT-LABEL: @test_log2( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_log2( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_log2( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_log2( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_log2( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_log2( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_log2( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_log2( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_log2( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_log2( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log2_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_log2(double x) { return log2(x); } -// DEFAULT-LABEL: @test_logbf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_logbf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_logbf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_logbf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_logbf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_logbf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_logbf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_logbf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_logbf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_logbf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_logb_f32(float noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_logbf(float x) { return logbf(x); } -// DEFAULT-LABEL: @test_logb( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_logb( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_logb( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_logb( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_logb( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_logb( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_logb( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_logb( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_logb( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_logb( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_logb_f64(double noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_logb(double x) { return logb(x); } -// DEFAULT-LABEL: @test_logf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_logf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_logf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_logf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_logf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_logf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_logf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_logf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_logf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_logf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_logf(float x) { return logf(x); } -// DEFAULT-LABEL: @test_lrintf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local i64 @test_lrintf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) // DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // -// FINITEONLY-LABEL: @test_lrintf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local i64 @test_lrintf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // -// APPROX-LABEL: @test_lrintf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local i64 @test_lrintf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// NCRDIV-LABEL: @test_lrintf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local i64 @test_lrintf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) // NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_lrintf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrintf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]]) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // @@ -3531,33 +4015,38 @@ extern "C" __device__ long int test_lrintf(float x) { return lrintf(x); } -// DEFAULT-LABEL: @test_lrint( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local i64 @test_lrint( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) // DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // -// FINITEONLY-LABEL: @test_lrint( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local i64 @test_lrint( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // -// APPROX-LABEL: @test_lrint( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local i64 @test_lrint( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// NCRDIV-LABEL: @test_lrint( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local i64 @test_lrint( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) // NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_lrint( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrint( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]]) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // @@ -3565,33 +4054,38 @@ extern "C" __device__ long int test_lrint(double x) { return lrint(x); } -// DEFAULT-LABEL: @test_lroundf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local i64 @test_lroundf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) // DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // -// FINITEONLY-LABEL: @test_lroundf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local i64 @test_lroundf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // -// APPROX-LABEL: @test_lroundf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local i64 @test_lroundf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// NCRDIV-LABEL: @test_lroundf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local i64 @test_lroundf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) // NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_lroundf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lroundf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]]) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // @@ -3599,33 +4093,38 @@ extern "C" __device__ long int test_lroundf(float x) { return lroundf(x); } -// DEFAULT-LABEL: @test_lround( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local i64 @test_lround( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) // DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // -// FINITEONLY-LABEL: @test_lround( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local i64 @test_lround( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // -// APPROX-LABEL: @test_lround( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local i64 @test_lround( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// NCRDIV-LABEL: @test_lround( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local i64 @test_lround( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) // NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_lround( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lround( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]]) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // @@ -3633,54 +4132,59 @@ extern "C" __device__ long int test_lround(double x) { return lround(x); } -// DEFAULT-LABEL: @test_modff( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local noundef float @test_modff( +// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]] -// DEFAULT-NEXT: store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16:![0-9]+]] +// DEFAULT-NEXT: store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_modff( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_modff( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]] -// FINITEONLY-NEXT: store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16:![0-9]+]] +// FINITEONLY-NEXT: store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_modff( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local noundef float @test_modff( +// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]] -// APPROX-NEXT: store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16:![0-9]+]] +// APPROX-NEXT: store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_modff( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local noundef float @test_modff( +// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17:![0-9]+]] -// NCRDIV-NEXT: store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA17:![0-9]+]] +// NCRDIV-NEXT: store float [[TMP0]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_modff( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_modff( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15:[0-9]+]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_modf_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17:![0-9]+]] -// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_modf_f32(float noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[FLOAT_TBAA17:![0-9]+]] +// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // @@ -3688,54 +4192,59 @@ extern "C" __device__ float test_modff(float x, float* y) { return modff(x, y); } -// DEFAULT-LABEL: @test_modf( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local noundef double @test_modf( +// DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]] -// DEFAULT-NEXT: store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18:![0-9]+]] +// DEFAULT-NEXT: store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] // DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_modf( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_modf( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]] -// FINITEONLY-NEXT: store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18:![0-9]+]] +// FINITEONLY-NEXT: store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] // FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_modf( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local noundef double @test_modf( +// APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]] -// APPROX-NEXT: store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18:![0-9]+]] +// APPROX-NEXT: store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] // APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_modf( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local noundef double @test_modf( +// NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19:![0-9]+]] -// NCRDIV-NEXT: store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA19:![0-9]+]] +// NCRDIV-NEXT: store double [[TMP0]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]] // NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_modf( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_modf( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_modf_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19:![0-9]+]] -// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_modf_f64(double noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[DOUBLE_TBAA19:![0-9]+]] +// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // @@ -3743,325 +4252,330 @@ extern "C" __device__ double test_modf(double x, double* y) { return modf(x, y); } -// DEFAULT-LABEL: @test_nanf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// DEFAULT-LABEL: define dso_local float @test_nanf( +// DEFAULT-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// DEFAULT-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]] -// DEFAULT: while.cond.i14.i.i.preheader: -// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] +// DEFAULT: [[WHILE_COND_I14_I_I_PREHEADER]]: +// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]] -// DEFAULT: if.then.i.i: +// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] +// DEFAULT: [[IF_THEN_I_I]]: // DEFAULT-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] -// DEFAULT-NEXT: switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [ -// DEFAULT-NEXT: i8 120, label [[IF_THEN5_I_I:%.*]] -// DEFAULT-NEXT: i8 88, label [[IF_THEN5_I_I]] +// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ +// DEFAULT-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] +// DEFAULT-NEXT: i8 88, label %[[IF_THEN5_I_I]] // DEFAULT-NEXT: ] -// DEFAULT: while.cond.i.i.i.preheader: -// DEFAULT-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT: [[WHILE_COND_I_I_I_PREHEADER]]: +// DEFAULT-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] -// DEFAULT: if.then5.i.i: -// DEFAULT-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] +// DEFAULT: [[IF_THEN5_I_I]]: +// DEFAULT-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]] -// DEFAULT: while.body.i31.i.i: -// DEFAULT-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ] -// DEFAULT-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ] -// DEFAULT-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] +// DEFAULT: [[WHILE_BODY_I31_I_I]]: +// DEFAULT-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ] +// DEFAULT-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ] +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ] // DEFAULT-NEXT: [[TMP6:%.*]] = add i8 [[TMP5]], -48 // DEFAULT-NEXT: [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10 -// DEFAULT-NEXT: br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]] -// DEFAULT: if.else.i.i.i: +// DEFAULT-NEXT: br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]] +// DEFAULT: [[IF_ELSE_I_I_I]]: // DEFAULT-NEXT: [[TMP7:%.*]] = add i8 [[TMP5]], -97 // DEFAULT-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6 -// DEFAULT-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// DEFAULT: if.else17.i.i.i: +// DEFAULT-NEXT: br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]] +// DEFAULT: [[IF_ELSE17_I_I_I]]: // DEFAULT-NEXT: [[TMP8:%.*]] = add i8 [[TMP5]], -65 // DEFAULT-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6 -// DEFAULT-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]] -// DEFAULT: if.end31.i.i.i: -// DEFAULT-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// DEFAULT: [[IF_END31_I_I_I]]: +// DEFAULT-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ] // DEFAULT-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4 // DEFAULT-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64 // DEFAULT-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // DEFAULT-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// DEFAULT-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] -// DEFAULT: while.body.i.i.i: -// DEFAULT-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ] -// DEFAULT-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ] -// DEFAULT-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// DEFAULT: [[WHILE_BODY_I_I_I]]: +// DEFAULT-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // DEFAULT-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], -8 // DEFAULT-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48 -// DEFAULT-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]] -// DEFAULT: if.then.i.i.i: +// DEFAULT-NEXT: br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// DEFAULT: [[IF_THEN_I_I_I]]: // DEFAULT-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3 // DEFAULT-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64 // DEFAULT-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // DEFAULT-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// DEFAULT-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] -// DEFAULT: while.body.i18.i.i: -// DEFAULT-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ] -// DEFAULT-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ] -// DEFAULT-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// DEFAULT: [[WHILE_BODY_I18_I_I]]: +// DEFAULT-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // DEFAULT-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], -48 // DEFAULT-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10 -// DEFAULT-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]] -// DEFAULT: if.then.i21.i.i: +// DEFAULT-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// DEFAULT: [[IF_THEN_I21_I_I]]: // DEFAULT-NEXT: [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10 // DEFAULT-NEXT: [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64 // DEFAULT-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // DEFAULT-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// DEFAULT-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] -// DEFAULT: _ZL4nanfPKc.exit: -// DEFAULT-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// DEFAULT: [[_ZL4NANFPKC_EXIT]]: +// DEFAULT-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 // DEFAULT-NEXT: [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303 // DEFAULT-NEXT: [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344 // DEFAULT-NEXT: [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float // DEFAULT-NEXT: ret float [[TMP16]] // -// FINITEONLY-LABEL: @test_nanf( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_nanf( +// FINITEONLY-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret float poison // -// APPROX-LABEL: @test_nanf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// APPROX-LABEL: define dso_local float @test_nanf( +// APPROX-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// APPROX-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]] -// APPROX: while.cond.i14.i.i.preheader: -// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] +// APPROX: [[WHILE_COND_I14_I_I_PREHEADER]]: +// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]] -// APPROX: if.then.i.i: +// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] +// APPROX: [[IF_THEN_I_I]]: // APPROX-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] -// APPROX-NEXT: switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [ -// APPROX-NEXT: i8 120, label [[IF_THEN5_I_I:%.*]] -// APPROX-NEXT: i8 88, label [[IF_THEN5_I_I]] +// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ +// APPROX-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] +// APPROX-NEXT: i8 88, label %[[IF_THEN5_I_I]] // APPROX-NEXT: ] -// APPROX: while.cond.i.i.i.preheader: -// APPROX-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX: [[WHILE_COND_I_I_I_PREHEADER]]: +// APPROX-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] -// APPROX: if.then5.i.i: -// APPROX-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] +// APPROX: [[IF_THEN5_I_I]]: +// APPROX-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]] -// APPROX: while.body.i31.i.i: -// APPROX-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ] -// APPROX-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ] -// APPROX-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ] +// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] +// APPROX: [[WHILE_BODY_I31_I_I]]: +// APPROX-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ] +// APPROX-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ] +// APPROX-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ] // APPROX-NEXT: [[TMP6:%.*]] = add i8 [[TMP5]], -48 // APPROX-NEXT: [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10 -// APPROX-NEXT: br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]] -// APPROX: if.else.i.i.i: +// APPROX-NEXT: br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]] +// APPROX: [[IF_ELSE_I_I_I]]: // APPROX-NEXT: [[TMP7:%.*]] = add i8 [[TMP5]], -97 // APPROX-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6 -// APPROX-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// APPROX: if.else17.i.i.i: +// APPROX-NEXT: br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]] +// APPROX: [[IF_ELSE17_I_I_I]]: // APPROX-NEXT: [[TMP8:%.*]] = add i8 [[TMP5]], -65 // APPROX-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6 -// APPROX-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]] -// APPROX: if.end31.i.i.i: -// APPROX-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// APPROX: [[IF_END31_I_I_I]]: +// APPROX-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ] // APPROX-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4 // APPROX-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64 // APPROX-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // APPROX-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// APPROX-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] -// APPROX: while.body.i.i.i: -// APPROX-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ] -// APPROX-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ] -// APPROX-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ] +// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// APPROX: [[WHILE_BODY_I_I_I]]: +// APPROX-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // APPROX-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], -8 // APPROX-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48 -// APPROX-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]] -// APPROX: if.then.i.i.i: +// APPROX-NEXT: br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// APPROX: [[IF_THEN_I_I_I]]: // APPROX-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3 // APPROX-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64 // APPROX-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // APPROX-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// APPROX-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] -// APPROX: while.body.i18.i.i: -// APPROX-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ] -// APPROX-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ] -// APPROX-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ] +// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// APPROX: [[WHILE_BODY_I18_I_I]]: +// APPROX-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // APPROX-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], -48 // APPROX-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10 -// APPROX-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]] -// APPROX: if.then.i21.i.i: +// APPROX-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// APPROX: [[IF_THEN_I21_I_I]]: // APPROX-NEXT: [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10 // APPROX-NEXT: [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64 // APPROX-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // APPROX-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// APPROX-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] -// APPROX: _ZL4nanfPKc.exit: -// APPROX-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ] +// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// APPROX: [[_ZL4NANFPKC_EXIT]]: +// APPROX-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 // APPROX-NEXT: [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303 // APPROX-NEXT: [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344 // APPROX-NEXT: [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float // APPROX-NEXT: ret float [[TMP16]] // -// NCRDIV-LABEL: @test_nanf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// NCRDIV-LABEL: define dso_local float @test_nanf( +// NCRDIV-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// NCRDIV-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]] -// NCRDIV: while.cond.i14.i.i.preheader: -// NCRDIV-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] +// NCRDIV: [[WHILE_COND_I14_I_I_PREHEADER]]: +// NCRDIV-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]] -// NCRDIV: if.then.i.i: +// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] +// NCRDIV: [[IF_THEN_I_I]]: // NCRDIV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// NCRDIV-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] -// NCRDIV-NEXT: switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [ -// NCRDIV-NEXT: i8 120, label [[IF_THEN5_I_I:%.*]] -// NCRDIV-NEXT: i8 88, label [[IF_THEN5_I_I]] +// NCRDIV-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ +// NCRDIV-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] +// NCRDIV-NEXT: i8 88, label %[[IF_THEN5_I_I]] // NCRDIV-NEXT: ] -// NCRDIV: while.cond.i.i.i.preheader: -// NCRDIV-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV: [[WHILE_COND_I_I_I_PREHEADER]]: +// NCRDIV-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] -// NCRDIV: if.then5.i.i: -// NCRDIV-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] +// NCRDIV: [[IF_THEN5_I_I]]: +// NCRDIV-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]] -// NCRDIV: while.body.i31.i.i: -// NCRDIV-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ] -// NCRDIV-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ] -// NCRDIV-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] +// NCRDIV: [[WHILE_BODY_I31_I_I]]: +// NCRDIV-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ] +// NCRDIV-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ] +// NCRDIV-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ] // NCRDIV-NEXT: [[TMP6:%.*]] = add i8 [[TMP5]], -48 // NCRDIV-NEXT: [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10 -// NCRDIV-NEXT: br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]] -// NCRDIV: if.else.i.i.i: +// NCRDIV-NEXT: br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]] +// NCRDIV: [[IF_ELSE_I_I_I]]: // NCRDIV-NEXT: [[TMP7:%.*]] = add i8 [[TMP5]], -97 // NCRDIV-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6 -// NCRDIV-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// NCRDIV: if.else17.i.i.i: +// NCRDIV-NEXT: br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]] +// NCRDIV: [[IF_ELSE17_I_I_I]]: // NCRDIV-NEXT: [[TMP8:%.*]] = add i8 [[TMP5]], -65 // NCRDIV-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6 -// NCRDIV-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]] -// NCRDIV: if.end31.i.i.i: -// NCRDIV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// NCRDIV-NEXT: br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// NCRDIV: [[IF_END31_I_I_I]]: +// NCRDIV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ] // NCRDIV-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4 // NCRDIV-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64 // NCRDIV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // NCRDIV-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// NCRDIV-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] -// NCRDIV: while.body.i.i.i: -// NCRDIV-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ] -// NCRDIV-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ] -// NCRDIV-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// NCRDIV: [[WHILE_BODY_I_I_I]]: +// NCRDIV-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] +// NCRDIV-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] +// NCRDIV-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // NCRDIV-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], -8 // NCRDIV-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48 -// NCRDIV-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]] -// NCRDIV: if.then.i.i.i: +// NCRDIV-NEXT: br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// NCRDIV: [[IF_THEN_I_I_I]]: // NCRDIV-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3 // NCRDIV-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64 // NCRDIV-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // NCRDIV-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// NCRDIV-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] -// NCRDIV: while.body.i18.i.i: -// NCRDIV-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ] -// NCRDIV-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ] -// NCRDIV-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// NCRDIV: [[WHILE_BODY_I18_I_I]]: +// NCRDIV-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// NCRDIV-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// NCRDIV-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // NCRDIV-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], -48 // NCRDIV-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10 -// NCRDIV-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]] -// NCRDIV: if.then.i21.i.i: +// NCRDIV-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// NCRDIV: [[IF_THEN_I21_I_I]]: // NCRDIV-NEXT: [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10 // NCRDIV-NEXT: [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64 // NCRDIV-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // NCRDIV-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// NCRDIV-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] -// NCRDIV: _ZL4nanfPKc.exit: -// NCRDIV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// NCRDIV: [[_ZL4NANFPKC_EXIT]]: +// NCRDIV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // NCRDIV-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 // NCRDIV-NEXT: [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303 // NCRDIV-NEXT: [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344 // NCRDIV-NEXT: [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float // NCRDIV-NEXT: ret float [[TMP16]] // -// AMDGCNSPIRV-LABEL: @test_nanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG:%.*]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-LABEL: define spir_func float @test_nanf( +// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] -// AMDGCNSPIRV: if.then.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I:.*]] +// AMDGCNSPIRV: [[IF_THEN_I_I]]: // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i8 120, label [[IF_THEN5_I_I:%.*]] -// AMDGCNSPIRV-NEXT: i8 88, label [[IF_THEN5_I_I]] +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label %[[WHILE_COND_I_I_I:.*]] [ +// AMDGCNSPIRV-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] +// AMDGCNSPIRV-NEXT: i8 88, label %[[IF_THEN5_I_I]] // AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then5.i.i: -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV: [[IF_THEN5_I_I]]: +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]] -// AMDGCNSPIRV: while.body.i32.i.i: -// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I5]], label %[[_ZL4NANFPKC_EXIT:.*]], label %[[WHILE_BODY_I32_I_I:.*]] +// AMDGCNSPIRV: [[WHILE_BODY_I32_I_I]]: +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP2]], %[[IF_THEN5_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ] // AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP3]], -48 // AMDGCNSPIRV-NEXT: [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP4]], 10 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]] -// AMDGCNSPIRV: if.else.i.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]] +// AMDGCNSPIRV: [[IF_ELSE_I_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = add i8 [[TMP3]], -97 // AMDGCNSPIRV-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// AMDGCNSPIRV: if.else17.i.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]] +// AMDGCNSPIRV: [[IF_ELSE17_I_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = add i8 [[TMP3]], -65 // AMDGCNSPIRV-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP6]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]] -// AMDGCNSPIRV: if.end31.i.i.i: -// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL4NANFPKC_EXIT]] +// AMDGCNSPIRV: [[IF_END31_I_I_I]]: +// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I32_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ] // AMDGCNSPIRV-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I7]], 4 // AMDGCNSPIRV-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64 // AMDGCNSPIRV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // AMDGCNSPIRV-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]] -// AMDGCNSPIRV: while.cond.i.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]] +// AMDGCNSPIRV: [[WHILE_COND_I_I_I]]: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I:.*]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I]] ], [ 0, %[[IF_THEN_I_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]] -// AMDGCNSPIRV: while.body.i.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I_I_I]] +// AMDGCNSPIRV: [[WHILE_BODY_I_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = and i8 [[TMP8]], -8 // AMDGCNSPIRV-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 48 // AMDGCNSPIRV-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 @@ -4071,14 +4585,14 @@ extern "C" __device__ double test_modf(double x, double* y) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP8]] -// AMDGCNSPIRV: while.cond.i14.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label %[[WHILE_COND_I_I_I]], label %[[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP8]] +// AMDGCNSPIRV: [[WHILE_COND_I14_I_I]]: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], %[[WHILE_BODY_I18_I_I:.*]] ], [ [[TAG]], %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], %[[WHILE_BODY_I18_I_I]] ], [ 0, %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]] -// AMDGCNSPIRV: while.body.i18.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL4NANFPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]] +// AMDGCNSPIRV: [[WHILE_BODY_I18_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = add i8 [[TMP10]], -48 // AMDGCNSPIRV-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP11]], 10 // AMDGCNSPIRV-NEXT: [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 @@ -4088,9 +4602,9 @@ extern "C" __device__ double test_modf(double x, double* y) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]] -// AMDGCNSPIRV: _ZL4nanfPKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[WHILE_COND_I14_I_I]], label %[[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]] +// AMDGCNSPIRV: [[_ZL4NANFPKC_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], %[[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], %[[WHILE_COND_I14_I_I]] ] // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 // AMDGCNSPIRV-NEXT: [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303 // AMDGCNSPIRV-NEXT: [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344 @@ -4101,322 +4615,327 @@ extern "C" __device__ float test_nanf(const char *tag) { return nanf(tag); } -// DEFAULT-LABEL: @test_nan( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// DEFAULT-LABEL: define dso_local double @test_nan( +// DEFAULT-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// DEFAULT-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]] -// DEFAULT: while.cond.i14.i.i.preheader: -// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] +// DEFAULT: [[WHILE_COND_I14_I_I_PREHEADER]]: +// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]] -// DEFAULT: if.then.i.i: +// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] +// DEFAULT: [[IF_THEN_I_I]]: // DEFAULT-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] -// DEFAULT-NEXT: switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [ -// DEFAULT-NEXT: i8 120, label [[IF_THEN5_I_I:%.*]] -// DEFAULT-NEXT: i8 88, label [[IF_THEN5_I_I]] +// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// DEFAULT-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ +// DEFAULT-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] +// DEFAULT-NEXT: i8 88, label %[[IF_THEN5_I_I]] // DEFAULT-NEXT: ] -// DEFAULT: while.cond.i.i.i.preheader: -// DEFAULT-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT: [[WHILE_COND_I_I_I_PREHEADER]]: +// DEFAULT-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] -// DEFAULT: if.then5.i.i: -// DEFAULT-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] +// DEFAULT: [[IF_THEN5_I_I]]: +// DEFAULT-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]] -// DEFAULT: while.body.i31.i.i: -// DEFAULT-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ] -// DEFAULT-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ] -// DEFAULT-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] +// DEFAULT: [[WHILE_BODY_I31_I_I]]: +// DEFAULT-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ] +// DEFAULT-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ] +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ] // DEFAULT-NEXT: [[TMP6:%.*]] = add i8 [[TMP5]], -48 // DEFAULT-NEXT: [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10 -// DEFAULT-NEXT: br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]] -// DEFAULT: if.else.i.i.i: +// DEFAULT-NEXT: br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]] +// DEFAULT: [[IF_ELSE_I_I_I]]: // DEFAULT-NEXT: [[TMP7:%.*]] = add i8 [[TMP5]], -97 // DEFAULT-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6 -// DEFAULT-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// DEFAULT: if.else17.i.i.i: +// DEFAULT-NEXT: br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]] +// DEFAULT: [[IF_ELSE17_I_I_I]]: // DEFAULT-NEXT: [[TMP8:%.*]] = add i8 [[TMP5]], -65 // DEFAULT-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6 -// DEFAULT-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]] -// DEFAULT: if.end31.i.i.i: -// DEFAULT-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL3NANPKC_EXIT]] +// DEFAULT: [[IF_END31_I_I_I]]: +// DEFAULT-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ] // DEFAULT-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4 // DEFAULT-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64 // DEFAULT-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // DEFAULT-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// DEFAULT-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] -// DEFAULT: while.body.i.i.i: -// DEFAULT-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ] -// DEFAULT-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ] -// DEFAULT-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// DEFAULT: [[WHILE_BODY_I_I_I]]: +// DEFAULT-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // DEFAULT-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], -8 // DEFAULT-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48 -// DEFAULT-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]] -// DEFAULT: if.then.i.i.i: +// DEFAULT-NEXT: br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL3NANPKC_EXIT]] +// DEFAULT: [[IF_THEN_I_I_I]]: // DEFAULT-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3 // DEFAULT-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64 // DEFAULT-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // DEFAULT-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// DEFAULT-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] -// DEFAULT: while.body.i18.i.i: -// DEFAULT-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ] -// DEFAULT-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ] -// DEFAULT-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// DEFAULT: [[WHILE_BODY_I18_I_I]]: +// DEFAULT-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // DEFAULT-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], -48 // DEFAULT-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10 -// DEFAULT-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]] -// DEFAULT: if.then.i21.i.i: +// DEFAULT-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL3NANPKC_EXIT]] +// DEFAULT: [[IF_THEN_I21_I_I]]: // DEFAULT-NEXT: [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10 // DEFAULT-NEXT: [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64 // DEFAULT-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // DEFAULT-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// DEFAULT-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // DEFAULT-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] -// DEFAULT: _ZL3nanPKc.exit: -// DEFAULT-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ] +// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// DEFAULT: [[_ZL3NANPKC_EXIT]]: +// DEFAULT-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // DEFAULT-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 // DEFAULT-NEXT: [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560 // DEFAULT-NEXT: [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double // DEFAULT-NEXT: ret double [[TMP16]] // -// FINITEONLY-LABEL: @test_nan( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_nan( +// FINITEONLY-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret double poison // -// APPROX-LABEL: @test_nan( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// APPROX-LABEL: define dso_local double @test_nan( +// APPROX-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// APPROX-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]] -// APPROX: while.cond.i14.i.i.preheader: -// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] +// APPROX: [[WHILE_COND_I14_I_I_PREHEADER]]: +// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]] -// APPROX: if.then.i.i: +// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] +// APPROX: [[IF_THEN_I_I]]: // APPROX-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] -// APPROX-NEXT: switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [ -// APPROX-NEXT: i8 120, label [[IF_THEN5_I_I:%.*]] -// APPROX-NEXT: i8 88, label [[IF_THEN5_I_I]] +// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// APPROX-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ +// APPROX-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] +// APPROX-NEXT: i8 88, label %[[IF_THEN5_I_I]] // APPROX-NEXT: ] -// APPROX: while.cond.i.i.i.preheader: -// APPROX-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX: [[WHILE_COND_I_I_I_PREHEADER]]: +// APPROX-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] -// APPROX: if.then5.i.i: -// APPROX-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] +// APPROX: [[IF_THEN5_I_I]]: +// APPROX-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]] -// APPROX: while.body.i31.i.i: -// APPROX-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ] -// APPROX-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ] -// APPROX-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ] +// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] +// APPROX: [[WHILE_BODY_I31_I_I]]: +// APPROX-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ] +// APPROX-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ] +// APPROX-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ] // APPROX-NEXT: [[TMP6:%.*]] = add i8 [[TMP5]], -48 // APPROX-NEXT: [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10 -// APPROX-NEXT: br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]] -// APPROX: if.else.i.i.i: +// APPROX-NEXT: br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]] +// APPROX: [[IF_ELSE_I_I_I]]: // APPROX-NEXT: [[TMP7:%.*]] = add i8 [[TMP5]], -97 // APPROX-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6 -// APPROX-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// APPROX: if.else17.i.i.i: +// APPROX-NEXT: br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]] +// APPROX: [[IF_ELSE17_I_I_I]]: // APPROX-NEXT: [[TMP8:%.*]] = add i8 [[TMP5]], -65 // APPROX-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6 -// APPROX-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]] -// APPROX: if.end31.i.i.i: -// APPROX-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL3NANPKC_EXIT]] +// APPROX: [[IF_END31_I_I_I]]: +// APPROX-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ] // APPROX-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4 // APPROX-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64 // APPROX-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // APPROX-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// APPROX-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] -// APPROX: while.body.i.i.i: -// APPROX-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ] -// APPROX-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ] -// APPROX-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ] +// APPROX-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// APPROX: [[WHILE_BODY_I_I_I]]: +// APPROX-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // APPROX-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], -8 // APPROX-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48 -// APPROX-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]] -// APPROX: if.then.i.i.i: +// APPROX-NEXT: br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL3NANPKC_EXIT]] +// APPROX: [[IF_THEN_I_I_I]]: // APPROX-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3 // APPROX-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64 // APPROX-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // APPROX-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// APPROX-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] -// APPROX: while.body.i18.i.i: -// APPROX-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ] -// APPROX-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ] -// APPROX-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ] +// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// APPROX: [[WHILE_BODY_I18_I_I]]: +// APPROX-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // APPROX-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], -48 // APPROX-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10 -// APPROX-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]] -// APPROX: if.then.i21.i.i: +// APPROX-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL3NANPKC_EXIT]] +// APPROX: [[IF_THEN_I21_I_I]]: // APPROX-NEXT: [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10 // APPROX-NEXT: [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64 // APPROX-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // APPROX-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // APPROX-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// APPROX-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // APPROX-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] -// APPROX: _ZL3nanPKc.exit: -// APPROX-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ] +// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// APPROX: [[_ZL3NANPKC_EXIT]]: +// APPROX-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // APPROX-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 // APPROX-NEXT: [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560 // APPROX-NEXT: [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double // APPROX-NEXT: ret double [[TMP16]] // -// NCRDIV-LABEL: @test_nan( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// NCRDIV-LABEL: define dso_local double @test_nan( +// NCRDIV-SAME: ptr noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr #[[ATTR2]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// NCRDIV-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]] -// NCRDIV: while.cond.i14.i.i.preheader: -// NCRDIV-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I_PREHEADER:.*]] +// NCRDIV: [[WHILE_COND_I14_I_I_PREHEADER]]: +// NCRDIV-NEXT: [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]] -// NCRDIV: if.then.i.i: +// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I18_I_I:.*]] +// NCRDIV: [[IF_THEN_I_I]]: // NCRDIV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 -// NCRDIV-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] -// NCRDIV-NEXT: switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [ -// NCRDIV-NEXT: i8 120, label [[IF_THEN5_I_I:%.*]] -// NCRDIV-NEXT: i8 88, label [[IF_THEN5_I_I]] +// NCRDIV-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] +// NCRDIV-NEXT: switch i8 [[TMP2]], label %[[WHILE_COND_I_I_I_PREHEADER:.*]] [ +// NCRDIV-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] +// NCRDIV-NEXT: i8 88, label %[[IF_THEN5_I_I]] // NCRDIV-NEXT: ] -// NCRDIV: while.cond.i.i.i.preheader: -// NCRDIV-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV: [[WHILE_COND_I_I_I_PREHEADER]]: +// NCRDIV-NEXT: [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] -// NCRDIV: if.then5.i.i: -// NCRDIV-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I14]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I:.*]] +// NCRDIV: [[IF_THEN5_I_I]]: +// NCRDIV-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]] -// NCRDIV: while.body.i31.i.i: -// NCRDIV-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ] -// NCRDIV-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ] -// NCRDIV-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I9]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I:.*]] +// NCRDIV: [[WHILE_BODY_I31_I_I]]: +// NCRDIV-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP4]], %[[IF_THEN5_I_I]] ] +// NCRDIV-NEXT: [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ] +// NCRDIV-NEXT: [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ] // NCRDIV-NEXT: [[TMP6:%.*]] = add i8 [[TMP5]], -48 // NCRDIV-NEXT: [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10 -// NCRDIV-NEXT: br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]] -// NCRDIV: if.else.i.i.i: +// NCRDIV-NEXT: br i1 [[OR_COND_I32_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]] +// NCRDIV: [[IF_ELSE_I_I_I]]: // NCRDIV-NEXT: [[TMP7:%.*]] = add i8 [[TMP5]], -97 // NCRDIV-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6 -// NCRDIV-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// NCRDIV: if.else17.i.i.i: +// NCRDIV-NEXT: br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]] +// NCRDIV: [[IF_ELSE17_I_I_I]]: // NCRDIV-NEXT: [[TMP8:%.*]] = add i8 [[TMP5]], -65 // NCRDIV-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6 -// NCRDIV-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]] -// NCRDIV: if.end31.i.i.i: -// NCRDIV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// NCRDIV-NEXT: br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL3NANPKC_EXIT]] +// NCRDIV: [[IF_END31_I_I_I]]: +// NCRDIV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I31_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ] // NCRDIV-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4 // NCRDIV-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64 // NCRDIV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // NCRDIV-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1 -// NCRDIV-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] -// NCRDIV: while.body.i.i.i: -// NCRDIV-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ] -// NCRDIV-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ] -// NCRDIV-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I30_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]] +// NCRDIV: [[WHILE_BODY_I_I_I]]: +// NCRDIV-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], %[[IF_THEN_I_I_I:.*]] ], [ [[TMP3]], %[[WHILE_COND_I_I_I_PREHEADER]] ] +// NCRDIV-NEXT: [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ] +// NCRDIV-NEXT: [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], %[[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[WHILE_COND_I_I_I_PREHEADER]] ] // NCRDIV-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], -8 // NCRDIV-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48 -// NCRDIV-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]] -// NCRDIV: if.then.i.i.i: +// NCRDIV-NEXT: br i1 [[OR_COND_I_I_I]], label %[[IF_THEN_I_I_I]], label %[[_ZL3NANPKC_EXIT]] +// NCRDIV: [[IF_THEN_I_I_I]]: // NCRDIV-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3 // NCRDIV-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64 // NCRDIV-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // NCRDIV-NEXT: [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1 -// NCRDIV-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] -// NCRDIV: while.body.i18.i.i: -// NCRDIV-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ] -// NCRDIV-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ] -// NCRDIV-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]] +// NCRDIV: [[WHILE_BODY_I18_I_I]]: +// NCRDIV-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], %[[IF_THEN_I21_I_I:.*]] ], [ [[TMP1]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// NCRDIV-NEXT: [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ] +// NCRDIV-NEXT: [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], %[[IF_THEN_I21_I_I]] ], [ [[TAG]], %[[WHILE_COND_I14_I_I_PREHEADER]] ] // NCRDIV-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], -48 // NCRDIV-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10 -// NCRDIV-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]] -// NCRDIV: if.then.i21.i.i: +// NCRDIV-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[IF_THEN_I21_I_I]], label %[[_ZL3NANPKC_EXIT]] +// NCRDIV: [[IF_THEN_I21_I_I]]: // NCRDIV-NEXT: [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10 // NCRDIV-NEXT: [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64 // NCRDIV-NEXT: [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48 // NCRDIV-NEXT: [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1 -// NCRDIV-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]] +// NCRDIV-NEXT: [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[CHAR_TBAA4]] // NCRDIV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0 -// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] -// NCRDIV: _ZL3nanPKc.exit: -// NCRDIV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ] +// NCRDIV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]] +// NCRDIV: [[_ZL3NANPKC_EXIT]]: +// NCRDIV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], %[[IF_THEN_I_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], %[[IF_THEN_I21_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ] // NCRDIV-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 // NCRDIV-NEXT: [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560 // NCRDIV-NEXT: [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double // NCRDIV-NEXT: ret double [[TMP16]] // -// AMDGCNSPIRV-LABEL: @test_nan( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG:%.*]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-LABEL: define spir_func double @test_nan( +// AMDGCNSPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[TAG:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] -// AMDGCNSPIRV: if.then.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label %[[IF_THEN_I_I:.*]], label %[[WHILE_COND_I14_I_I:.*]] +// AMDGCNSPIRV: [[IF_THEN_I_I]]: // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i8 120, label [[IF_THEN5_I_I:%.*]] -// AMDGCNSPIRV-NEXT: i8 88, label [[IF_THEN5_I_I]] +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]] +// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label %[[WHILE_COND_I_I_I:.*]] [ +// AMDGCNSPIRV-NEXT: i8 120, label %[[IF_THEN5_I_I:.*]] +// AMDGCNSPIRV-NEXT: i8 88, label %[[IF_THEN5_I_I]] // AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then5.i.i: -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV: [[IF_THEN5_I_I]]: +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]] -// AMDGCNSPIRV: while.body.i32.i.i: -// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I5]], label %[[_ZL3NANPKC_EXIT:.*]], label %[[WHILE_BODY_I32_I_I:.*]] +// AMDGCNSPIRV: [[WHILE_BODY_I32_I_I]]: +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], %[[IF_END31_I_I_I:.*]] ], [ [[TMP2]], %[[IF_THEN5_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_THEN5_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], %[[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN5_I_I]] ] // AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP3]], -48 // AMDGCNSPIRV-NEXT: [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP4]], 10 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]] -// AMDGCNSPIRV: if.else.i.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE_I_I_I:.*]] +// AMDGCNSPIRV: [[IF_ELSE_I_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = add i8 [[TMP3]], -97 // AMDGCNSPIRV-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// AMDGCNSPIRV: if.else17.i.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I_I]], label %[[IF_END31_I_I_I]], label %[[IF_ELSE17_I_I_I:.*]] +// AMDGCNSPIRV: [[IF_ELSE17_I_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = add i8 [[TMP3]], -65 // AMDGCNSPIRV-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP6]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]] -// AMDGCNSPIRV: if.end31.i.i.i: -// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I_I]], label %[[IF_END31_I_I_I]], label %[[_ZL3NANPKC_EXIT]] +// AMDGCNSPIRV: [[IF_END31_I_I_I]]: +// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, %[[WHILE_BODY_I32_I_I]] ], [ -87, %[[IF_ELSE_I_I_I]] ], [ -55, %[[IF_ELSE17_I_I_I]] ] // AMDGCNSPIRV-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I7]], 4 // AMDGCNSPIRV-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64 // AMDGCNSPIRV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // AMDGCNSPIRV-NEXT: [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]] -// AMDGCNSPIRV: while.cond.i.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]] +// AMDGCNSPIRV: [[WHILE_COND_I_I_I]]: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I:.*]] ], [ [[INCDEC_PTR_I_I]], %[[IF_THEN_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], %[[WHILE_BODY_I_I_I]] ], [ 0, %[[IF_THEN_I_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]] -// AMDGCNSPIRV: while.body.i.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I_I_I]] +// AMDGCNSPIRV: [[WHILE_BODY_I_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = and i8 [[TMP8]], -8 // AMDGCNSPIRV-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 48 // AMDGCNSPIRV-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 @@ -4426,14 +4945,14 @@ extern "C" __device__ float test_nanf(const char *tag) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP8]] -// AMDGCNSPIRV: while.cond.i14.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label %[[WHILE_COND_I_I_I]], label %[[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP8]] +// AMDGCNSPIRV: [[WHILE_COND_I14_I_I]]: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], %[[WHILE_BODY_I18_I_I:.*]] ], [ [[TAG]], %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], %[[WHILE_BODY_I18_I_I]] ], [ 0, %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[CHAR_TBAA5]] // AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]] -// AMDGCNSPIRV: while.body.i18.i.i: +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label %[[_ZL3NANPKC_EXIT]], label %[[WHILE_BODY_I18_I_I]] +// AMDGCNSPIRV: [[WHILE_BODY_I18_I_I]]: // AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = add i8 [[TMP10]], -48 // AMDGCNSPIRV-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP11]], 10 // AMDGCNSPIRV-NEXT: [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 @@ -4443,9 +4962,9 @@ extern "C" __device__ float test_nanf(const char *tag) { // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64 // AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]] // AMDGCNSPIRV-NEXT: [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]] -// AMDGCNSPIRV: _ZL3nanPKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label %[[WHILE_COND_I14_I_I]], label %[[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]] +// AMDGCNSPIRV: [[_ZL3NANPKC_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, %[[IF_THEN5_I_I]] ], [ 0, %[[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], %[[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], %[[IF_END31_I_I_I]] ], [ 0, %[[IF_ELSE17_I_I_I]] ], [ 0, %[[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], %[[WHILE_COND_I14_I_I]] ] // AMDGCNSPIRV-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 // AMDGCNSPIRV-NEXT: [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560 // AMDGCNSPIRV-NEXT: [[TMP12:%.*]] = bitcast i64 [[BF_SET9_I]] to double @@ -4455,958 +4974,1093 @@ extern "C" __device__ double test_nan(const char *tag) { return nan(tag); } -// DEFAULT-LABEL: @test_nanf_emptystr( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local noundef float @test_nanf_emptystr( +// DEFAULT-SAME: ) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: ret float 0x7FF8000000000000 // -// FINITEONLY-LABEL: @test_nanf_emptystr( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_nanf_emptystr( +// FINITEONLY-SAME: ) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret float poison // -// APPROX-LABEL: @test_nanf_emptystr( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local noundef float @test_nanf_emptystr( +// APPROX-SAME: ) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: ret float 0x7FF8000000000000 // -// NCRDIV-LABEL: @test_nanf_emptystr( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local noundef float @test_nanf_emptystr( +// NCRDIV-SAME: ) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: ret float 0x7FF8000000000000 // -// AMDGCNSPIRV-LABEL: @test_nanf_emptystr( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_nanf_emptystr( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: ret float 0x7FF8000000000000 // extern "C" __device__ float test_nanf_emptystr() { return nanf(""); } -// DEFAULT-LABEL: @test_nan_emptystr( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local noundef double @test_nan_emptystr( +// DEFAULT-SAME: ) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: ret double 0x7FF8000000000000 // -// FINITEONLY-LABEL: @test_nan_emptystr( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_nan_emptystr( +// FINITEONLY-SAME: ) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret double poison // -// APPROX-LABEL: @test_nan_emptystr( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local noundef double @test_nan_emptystr( +// APPROX-SAME: ) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: ret double 0x7FF8000000000000 // -// NCRDIV-LABEL: @test_nan_emptystr( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local noundef double @test_nan_emptystr( +// NCRDIV-SAME: ) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: ret double 0x7FF8000000000000 // -// AMDGCNSPIRV-LABEL: @test_nan_emptystr( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_nan_emptystr( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: ret double 0x7FF8000000000000 // extern "C" __device__ double test_nan_emptystr() { return nan(""); } -// DEFAULT-LABEL: @test_nanf_fill( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local noundef float @test_nanf_fill( +// DEFAULT-SAME: ) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: ret float 0x7FF8000000000000 // -// FINITEONLY-LABEL: @test_nanf_fill( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_nanf_fill( +// FINITEONLY-SAME: ) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret float poison // -// APPROX-LABEL: @test_nanf_fill( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local noundef float @test_nanf_fill( +// APPROX-SAME: ) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: ret float 0x7FF8000000000000 // -// NCRDIV-LABEL: @test_nanf_fill( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local noundef float @test_nanf_fill( +// NCRDIV-SAME: ) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: ret float 0x7FF8000000000000 // -// AMDGCNSPIRV-LABEL: @test_nanf_fill( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_nanf_fill( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: ret float 0x7FF8000000000000 // extern "C" __device__ float test_nanf_fill() { return nanf("0x456"); } -// DEFAULT-LABEL: @test_nan_fill( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local noundef double @test_nan_fill( +// DEFAULT-SAME: ) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: ret double 0x7FF8000000000000 // -// FINITEONLY-LABEL: @test_nan_fill( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_nan_fill( +// FINITEONLY-SAME: ) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret double poison // -// APPROX-LABEL: @test_nan_fill( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local noundef double @test_nan_fill( +// APPROX-SAME: ) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: ret double 0x7FF8000000000000 // -// NCRDIV-LABEL: @test_nan_fill( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local noundef double @test_nan_fill( +// NCRDIV-SAME: ) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: ret double 0x7FF8000000000000 // -// AMDGCNSPIRV-LABEL: @test_nan_fill( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_nan_fill( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: ret double 0x7FF8000000000000 // extern "C" __device__ double test_nan_fill() { return nan("0x123"); } -// DEFAULT-LABEL: @test_nearbyintf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_nearbyintf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_nearbyintf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.nearbyint.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_nearbyintf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.nearbyint.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_nearbyintf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_nearbyintf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_nearbyintf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_nearbyintf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_nearbyintf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.nearbyint.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_nearbyintf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.nearbyint.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_nearbyintf(float x) { return nearbyintf(x); } -// DEFAULT-LABEL: @test_nearbyint( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_nearbyint( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_nearbyint( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.nearbyint.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_nearbyint( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.nearbyint.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_nearbyint( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_nearbyint( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_nearbyint( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_nearbyint( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_nearbyint( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.nearbyint.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_nearbyint( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.nearbyint.f64(double [[X]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_nearbyint(double x) { return nearbyint(x); } -// DEFAULT-LABEL: @test_nextafterf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_nextafterf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_nextafterf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_nextafterf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_nextafterf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_nextafterf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_nextafterf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_nextafterf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_nextafterf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_nextafterf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_nextafter_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_nextafterf(float x, float y) { return nextafterf(x, y); } -// DEFAULT-LABEL: @test_nextafter( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_nextafter( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_nextafter( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_nextafter( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_nextafter( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_nextafter( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_nextafter( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_nextafter( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_nextafter( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_nextafter( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_nextafter_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_nextafter(double x, double y) { return nextafter(x, y); } -// DEFAULT-LABEL: @test_norm3df( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_norm3df( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_norm3df( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_norm3df( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_norm3df( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_norm3df( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_norm3df( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_norm3df( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_norm3df( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_norm3df( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_norm3df(float x, float y, float z) { return norm3df(x, y, z); } -// DEFAULT-LABEL: @test_norm3d( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_norm3d( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_norm3d( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_norm3d( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_norm3d( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_norm3d( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_norm3d( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_norm3d( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_norm3d( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_norm3d( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_norm3d(double x, double y, double z) { return norm3d(x, y, z); } -// DEFAULT-LABEL: @test_norm4df( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_norm4df( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_norm4df( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_norm4df( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]], float noundef nofpclass(nan inf) [[W]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_norm4df( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_norm4df( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_norm4df( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_norm4df( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_norm4df( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_norm4df( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_norm4df(float x, float y, float z, float w) { return norm4df(x, y, z, w); } -// DEFAULT-LABEL: @test_norm4d( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_norm4d( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_norm4d( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_norm4d( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]], double noundef nofpclass(nan inf) [[W]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_norm4d( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_norm4d( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_norm4d( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_norm4d( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_norm4d( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_norm4d( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_norm4d(double x, double y, double z, double w) { return norm4d(x, y, z, w); } -// DEFAULT-LABEL: @test_normcdff( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_normcdff( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_normcdff( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_normcdff( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_normcdff( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_normcdff( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_normcdff( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_normcdff( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_normcdff( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_normcdff( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdf_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_normcdff(float x) { return normcdff(x); } -// DEFAULT-LABEL: @test_normcdf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_normcdf( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_normcdf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_normcdf( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_normcdf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_normcdf( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_normcdf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_normcdf( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_normcdf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_normcdf( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdf_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_normcdf(double x) { return normcdf(x); } -// DEFAULT-LABEL: @test_normcdfinvf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_normcdfinvf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_normcdfinvf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_normcdfinvf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_normcdfinvf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_normcdfinvf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_normcdfinvf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_normcdfinvf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_normcdfinvf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_normcdfinvf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdfinv_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_normcdfinvf(float x) { return normcdfinvf(x); } -// DEFAULT-LABEL: @test_normcdfinv( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_normcdfinv( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_normcdfinv( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_normcdfinv( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_normcdfinv( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_normcdfinv( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_normcdfinv( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_normcdfinv( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_normcdfinv( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_normcdfinv( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdfinv_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_normcdfinv(double x) { return normcdfinv(x); } -// DEFAULT-LABEL: @test_normf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// DEFAULT: while.body.i: -// DEFAULT-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// DEFAULT-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// DEFAULT-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// DEFAULT-LABEL: define dso_local float @test_normf( +// DEFAULT-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] { +// DEFAULT-NEXT: [[ENTRY:.*]]: +// DEFAULT-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// DEFAULT: [[WHILE_BODY_I]]: +// DEFAULT-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// DEFAULT-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// DEFAULT-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // DEFAULT-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // DEFAULT-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // DEFAULT-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]] -// DEFAULT: _ZL5normfiPKf.exit.loopexit: +// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]] +// DEFAULT: [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]: // DEFAULT-NEXT: [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]]) -// DEFAULT-NEXT: br label [[_ZL5NORMFIPKF_EXIT]] -// DEFAULT: _ZL5normfiPKf.exit: -// DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] +// DEFAULT-NEXT: br label %[[_ZL5NORMFIPKF_EXIT]] +// DEFAULT: [[_ZL5NORMFIPKF_EXIT]]: +// DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] // DEFAULT-NEXT: ret float [[__R_0_I_LCSSA]] // -// FINITEONLY-LABEL: @test_normf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// FINITEONLY: while.body.i: -// FINITEONLY-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// FINITEONLY-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// FINITEONLY-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_normf( +// FINITEONLY-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] { +// FINITEONLY-NEXT: [[ENTRY:.*]]: +// FINITEONLY-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// FINITEONLY: [[WHILE_BODY_I]]: +// FINITEONLY-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// FINITEONLY-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// FINITEONLY-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // FINITEONLY-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[TMP0]], [[TMP0]] // FINITEONLY-NEXT: [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]] // FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // FINITEONLY-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]] -// FINITEONLY: _ZL5normfiPKf.exit.loopexit: +// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]] +// FINITEONLY: [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]: // FINITEONLY-NEXT: [[TMP1:%.*]] = tail call nnan ninf contract float @llvm.sqrt.f32(float [[ADD_I]]) -// FINITEONLY-NEXT: br label [[_ZL5NORMFIPKF_EXIT]] -// FINITEONLY: _ZL5normfiPKf.exit: -// FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] +// FINITEONLY-NEXT: br label %[[_ZL5NORMFIPKF_EXIT]] +// FINITEONLY: [[_ZL5NORMFIPKF_EXIT]]: +// FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] // FINITEONLY-NEXT: ret float [[__R_0_I_LCSSA]] // -// APPROX-LABEL: @test_normf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// APPROX-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// APPROX: while.body.i: -// APPROX-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// APPROX-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// APPROX-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// APPROX-LABEL: define dso_local float @test_normf( +// APPROX-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] { +// APPROX-NEXT: [[ENTRY:.*]]: +// APPROX-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// APPROX-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// APPROX: [[WHILE_BODY_I]]: +// APPROX-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// APPROX-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// APPROX-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // APPROX-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] +// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // APPROX-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] // APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // APPROX-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]] -// APPROX: _ZL5normfiPKf.exit.loopexit: +// APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]] +// APPROX: [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]: // APPROX-NEXT: [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]]) -// APPROX-NEXT: br label [[_ZL5NORMFIPKF_EXIT]] -// APPROX: _ZL5normfiPKf.exit: -// APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] +// APPROX-NEXT: br label %[[_ZL5NORMFIPKF_EXIT]] +// APPROX: [[_ZL5NORMFIPKF_EXIT]]: +// APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] // APPROX-NEXT: ret float [[__R_0_I_LCSSA]] // -// NCRDIV-LABEL: @test_normf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// NCRDIV: while.body.i: -// NCRDIV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// NCRDIV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// NCRDIV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// NCRDIV-LABEL: define dso_local float @test_normf( +// NCRDIV-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] { +// NCRDIV-NEXT: [[ENTRY:.*]]: +// NCRDIV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// NCRDIV: [[WHILE_BODY_I]]: +// NCRDIV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// NCRDIV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// NCRDIV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // NCRDIV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // NCRDIV-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // NCRDIV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] -// NCRDIV: _ZL5normfiPKf.exit.loopexit: +// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] +// NCRDIV: [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]: // NCRDIV-NEXT: [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]]) -// NCRDIV-NEXT: br label [[_ZL5NORMFIPKF_EXIT]] -// NCRDIV: _ZL5normfiPKf.exit: -// NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] +// NCRDIV-NEXT: br label %[[_ZL5NORMFIPKF_EXIT]] +// NCRDIV: [[_ZL5NORMFIPKF_EXIT]]: +// NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] // NCRDIV-NEXT: ret float [[__R_0_I_LCSSA]] // -// AMDGCNSPIRV-LABEL: @test_normf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// AMDGCNSPIRV-LABEL: define spir_func float @test_normf( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], ptr addrspace(4) noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5NORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// AMDGCNSPIRV: [[WHILE_BODY_I]]: +// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA17]] // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4 // AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] -// AMDGCNSPIRV: _ZL5normfiPKf.exit.loopexit: +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] +// AMDGCNSPIRV: [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]]: // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = tail call contract addrspace(4) float @llvm.sqrt.f32(float [[ADD_I]]) -// AMDGCNSPIRV-NEXT: br label [[_ZL5NORMFIPKF_EXIT]] -// AMDGCNSPIRV: _ZL5normfiPKf.exit: -// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] +// AMDGCNSPIRV-NEXT: br label %[[_ZL5NORMFIPKF_EXIT]] +// AMDGCNSPIRV: [[_ZL5NORMFIPKF_EXIT]]: +// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ] // AMDGCNSPIRV-NEXT: ret float [[__R_0_I_LCSSA]] // extern "C" __device__ float test_normf(int x, const float *y) { return normf(x, y); } -// DEFAULT-LABEL: @test_norm( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// DEFAULT: while.body.i: -// DEFAULT-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// DEFAULT-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// DEFAULT-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// DEFAULT-LABEL: define dso_local double @test_norm( +// DEFAULT-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] { +// DEFAULT-NEXT: [[ENTRY:.*]]: +// DEFAULT-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// DEFAULT: [[WHILE_BODY_I]]: +// DEFAULT-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// DEFAULT-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// DEFAULT-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // DEFAULT-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]] // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // DEFAULT-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // DEFAULT-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] -// DEFAULT: _ZL4normiPKd.exit.loopexit: +// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] +// DEFAULT: [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]: // DEFAULT-NEXT: [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]]) -// DEFAULT-NEXT: br label [[_ZL4NORMIPKD_EXIT]] -// DEFAULT: _ZL4normiPKd.exit: -// DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] +// DEFAULT-NEXT: br label %[[_ZL4NORMIPKD_EXIT]] +// DEFAULT: [[_ZL4NORMIPKD_EXIT]]: +// DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] // DEFAULT-NEXT: ret double [[__R_0_I_LCSSA]] // -// FINITEONLY-LABEL: @test_norm( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// FINITEONLY: while.body.i: -// FINITEONLY-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// FINITEONLY-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// FINITEONLY-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_norm( +// FINITEONLY-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] { +// FINITEONLY-NEXT: [[ENTRY:.*]]: +// FINITEONLY-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// FINITEONLY: [[WHILE_BODY_I]]: +// FINITEONLY-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// FINITEONLY-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// FINITEONLY-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // FINITEONLY-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]] // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract double [[TMP0]], [[TMP0]] // FINITEONLY-NEXT: [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]] // FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // FINITEONLY-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] -// FINITEONLY: _ZL4normiPKd.exit.loopexit: +// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] +// FINITEONLY: [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]: // FINITEONLY-NEXT: [[TMP1:%.*]] = tail call nnan ninf contract double @llvm.sqrt.f64(double [[ADD_I]]) -// FINITEONLY-NEXT: br label [[_ZL4NORMIPKD_EXIT]] -// FINITEONLY: _ZL4normiPKd.exit: -// FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] +// FINITEONLY-NEXT: br label %[[_ZL4NORMIPKD_EXIT]] +// FINITEONLY: [[_ZL4NORMIPKD_EXIT]]: +// FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] // FINITEONLY-NEXT: ret double [[__R_0_I_LCSSA]] // -// APPROX-LABEL: @test_norm( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// APPROX-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// APPROX: while.body.i: -// APPROX-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// APPROX-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// APPROX-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// APPROX-LABEL: define dso_local double @test_norm( +// APPROX-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] { +// APPROX-NEXT: [[ENTRY:.*]]: +// APPROX-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// APPROX-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// APPROX: [[WHILE_BODY_I]]: +// APPROX-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// APPROX-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// APPROX-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // APPROX-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] +// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]] // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // APPROX-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] // APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // APPROX-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] -// APPROX: _ZL4normiPKd.exit.loopexit: +// APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] +// APPROX: [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]: // APPROX-NEXT: [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]]) -// APPROX-NEXT: br label [[_ZL4NORMIPKD_EXIT]] -// APPROX: _ZL4normiPKd.exit: -// APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] +// APPROX-NEXT: br label %[[_ZL4NORMIPKD_EXIT]] +// APPROX: [[_ZL4NORMIPKD_EXIT]]: +// APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] // APPROX-NEXT: ret double [[__R_0_I_LCSSA]] // -// NCRDIV-LABEL: @test_norm( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// NCRDIV: while.body.i: -// NCRDIV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// NCRDIV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// NCRDIV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// NCRDIV-LABEL: define dso_local double @test_norm( +// NCRDIV-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] { +// NCRDIV-NEXT: [[ENTRY:.*]]: +// NCRDIV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// NCRDIV: [[WHILE_BODY_I]]: +// NCRDIV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// NCRDIV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// NCRDIV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // NCRDIV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA19]] // NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // NCRDIV-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // NCRDIV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] -// NCRDIV: _ZL4normiPKd.exit.loopexit: +// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] +// NCRDIV: [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]: // NCRDIV-NEXT: [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]]) -// NCRDIV-NEXT: br label [[_ZL4NORMIPKD_EXIT]] -// NCRDIV: _ZL4normiPKd.exit: -// NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] +// NCRDIV-NEXT: br label %[[_ZL4NORMIPKD_EXIT]] +// NCRDIV: [[_ZL4NORMIPKD_EXIT]]: +// NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] // NCRDIV-NEXT: ret double [[__R_0_I_LCSSA]] // -// AMDGCNSPIRV-LABEL: @test_norm( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// AMDGCNSPIRV-LABEL: define spir_func double @test_norm( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], ptr addrspace(4) noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL4NORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// AMDGCNSPIRV: [[WHILE_BODY_I]]: +// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA19]] // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8 // AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] -// AMDGCNSPIRV: _ZL4normiPKd.exit.loopexit: +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL4NORMIPKD_EXIT_LOOPEXIT:.*]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] +// AMDGCNSPIRV: [[_ZL4NORMIPKD_EXIT_LOOPEXIT]]: // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = tail call contract addrspace(4) double @llvm.sqrt.f64(double [[ADD_I]]) -// AMDGCNSPIRV-NEXT: br label [[_ZL4NORMIPKD_EXIT]] -// AMDGCNSPIRV: _ZL4normiPKd.exit: -// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] +// AMDGCNSPIRV-NEXT: br label %[[_ZL4NORMIPKD_EXIT]] +// AMDGCNSPIRV: [[_ZL4NORMIPKD_EXIT]]: +// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ] // AMDGCNSPIRV-NEXT: ret double [[__R_0_I_LCSSA]] // extern "C" __device__ double test_norm(int x, const double *y) { return norm(x, y); } -// DEFAULT-LABEL: @test_powf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_powf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_powf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_powf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_powf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_powf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_powf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_powf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_powf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_powf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_powf(float x, float y) { return powf(x, y); } -// DEFAULT-LABEL: @test_pow( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_pow( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_pow( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_pow( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_pow( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_pow( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_pow( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_pow( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_pow( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_pow( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pow_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_pow(double x, double y) { return pow(x, y); } -// DEFAULT-LABEL: @test_powif( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_powif( +// DEFAULT-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_powif( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_powif( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X]], i32 noundef [[Y]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_powif( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_powif( +// APPROX-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_powif( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_powif( +// NCRDIV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_powif( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_powif( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pown_f32(float noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_powif(float x, int y) { return powif(x, y); } -// DEFAULT-LABEL: @test_powi( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_powi( +// DEFAULT-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_powi( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_powi( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X]], i32 noundef [[Y]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_powi( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_powi( +// APPROX-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_powi( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_powi( +// NCRDIV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_powi( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_powi( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pown_f64(double noundef [[X]], i32 noundef [[Y]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_powi(double x, int y) { return powi(x, y); } -// DEFAULT-LABEL: @test_rcbrtf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_rcbrtf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_rcbrtf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rcbrtf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_rcbrtf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_rcbrtf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_rcbrtf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_rcbrtf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rcbrtf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rcbrtf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rcbrt_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_rcbrtf(float x) { return rcbrtf(x); } -// DEFAULT-LABEL: @test_rcbrt( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_rcbrt( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_rcbrt( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rcbrt( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_rcbrt( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_rcbrt( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_rcbrt( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_rcbrt( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rcbrt( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rcbrt( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rcbrt_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_rcbrt(double x) { return rcbrt(x); } -// DEFAULT-LABEL: @test_remainderf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_remainderf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_remainderf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_remainderf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_remainderf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_remainderf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_remainderf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_remainderf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_remainderf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_remainderf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_remainder_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_remainderf(float x, float y) { return remainderf(x, y); } -// DEFAULT-LABEL: @test_remainder( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_remainder( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_remainder( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_remainder( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_remainder( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_remainder( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_remainder( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_remainder( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_remainder( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_remainder( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_remainder_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_remainder(double x, double y) { return remainder(x, y); } -// DEFAULT-LABEL: @test_remquof( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local noundef float @test_remquof( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) // DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]] -// DEFAULT-NEXT: store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] +// DEFAULT-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] // DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_remquof( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_remquof( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) // FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]] -// FINITEONLY-NEXT: store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] +// FINITEONLY-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] // FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_remquof( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local noundef float @test_remquof( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) // APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]] -// APPROX-NEXT: store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] +// APPROX-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] // APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_remquof( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local noundef float @test_remquof( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) // NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA13]] -// NCRDIV-NEXT: store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA13]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA13]] +// NCRDIV-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA13]] // NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_remquof( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_remquof( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4 // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]] -// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_remquo_f32(float noundef [[X]], float noundef [[Y]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[INT_TBAA13]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[INT_TBAA13]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // @@ -5414,54 +6068,59 @@ extern "C" __device__ float test_remquof(float x, float y, int* z) { return remquof(x, y, z); } -// DEFAULT-LABEL: @test_remquo( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local noundef double @test_remquo( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) // DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]] -// DEFAULT-NEXT: store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] +// DEFAULT-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] // DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_remquo( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_remquo( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) // FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]] -// FINITEONLY-NEXT: store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] +// FINITEONLY-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] // FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_remquo( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local noundef double @test_remquo( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) // APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]] -// APPROX-NEXT: store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA12]] +// APPROX-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA12]] // APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_remquo( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local noundef double @test_remquo( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5) // NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA13]] -// NCRDIV-NEXT: store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA13]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[INT_TBAA13]] +// NCRDIV-NEXT: store i32 [[TMP0]], ptr [[Z]], align 4, !tbaa [[INT_TBAA13]] // NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_remquo( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_remquo( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4 // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]] -// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_remquo_f64(double noundef [[X]], double noundef [[Y]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[INT_TBAA13]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[INT_TBAA13]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // @@ -5469,219 +6128,244 @@ extern "C" __device__ double test_remquo(double x, double y, int* z) { return remquo(x, y, z); } -// DEFAULT-LABEL: @test_rhypotf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_rhypotf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_rhypotf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rhypotf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_rhypotf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_rhypotf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_rhypotf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_rhypotf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rhypotf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rhypotf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rhypot_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_rhypotf(float x, float y) { return rhypotf(x, y); } -// DEFAULT-LABEL: @test_rhypot( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_rhypot( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_rhypot( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rhypot( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_rhypot( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_rhypot( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_rhypot( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_rhypot( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rhypot( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rhypot( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rhypot_f64(double noundef [[X]], double noundef [[Y]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_rhypot(double x, double y) { return rhypot(x, y); } -// DEFAULT-LABEL: @test_rintf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_rintf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_rintf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.rint.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rintf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.rint.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_rintf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_rintf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_rintf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_rintf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_rintf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.rint.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rintf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.rint.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_rintf(float x) { return rintf(x); } -// DEFAULT-LABEL: @test_rint( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_rint( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_rint( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.rint.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rint( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.rint.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_rint( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_rint( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_rint( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_rint( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_rint( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.rint.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rint( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.rint.f64(double [[X]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_rint(double x) { return rint(x); } -// DEFAULT-LABEL: @test_rnormf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// DEFAULT: while.body.i: -// DEFAULT-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// DEFAULT-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// DEFAULT-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// DEFAULT-LABEL: define dso_local noundef float @test_rnormf( +// DEFAULT-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*]]: +// DEFAULT-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// DEFAULT: [[WHILE_BODY_I]]: +// DEFAULT-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// DEFAULT-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// DEFAULT-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // DEFAULT-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // DEFAULT-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // DEFAULT-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] -// DEFAULT: _ZL6rnormfiPKf.exit: -// DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] +// DEFAULT: [[_ZL6RNORMFIPKF_EXIT]]: +// DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_rnormf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// FINITEONLY: while.body.i: -// FINITEONLY-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// FINITEONLY-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// FINITEONLY-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rnormf( +// FINITEONLY-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*]]: +// FINITEONLY-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// FINITEONLY: [[WHILE_BODY_I]]: +// FINITEONLY-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// FINITEONLY-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// FINITEONLY-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // FINITEONLY-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[TMP0]], [[TMP0]] // FINITEONLY-NEXT: [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]] // FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // FINITEONLY-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] -// FINITEONLY: _ZL6rnormfiPKf.exit: -// FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] +// FINITEONLY: [[_ZL6RNORMFIPKF_EXIT]]: +// FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_rnormf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// APPROX-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// APPROX: while.body.i: -// APPROX-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// APPROX-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// APPROX-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// APPROX-LABEL: define dso_local noundef float @test_rnormf( +// APPROX-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*]]: +// APPROX-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// APPROX-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// APPROX: [[WHILE_BODY_I]]: +// APPROX-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// APPROX-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// APPROX-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // APPROX-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] +// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // APPROX-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] // APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // APPROX-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] -// APPROX: _ZL6rnormfiPKf.exit: -// APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] +// APPROX: [[_ZL6RNORMFIPKF_EXIT]]: +// APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_rnormf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// NCRDIV: while.body.i: -// NCRDIV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// NCRDIV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// NCRDIV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// NCRDIV-LABEL: define dso_local noundef float @test_rnormf( +// NCRDIV-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*]]: +// NCRDIV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// NCRDIV: [[WHILE_BODY_I]]: +// NCRDIV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// NCRDIV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// NCRDIV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // NCRDIV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // NCRDIV-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // NCRDIV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] -// NCRDIV: _ZL6rnormfiPKf.exit: -// NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] +// NCRDIV: [[_ZL6RNORMFIPKF_EXIT]]: +// NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnormf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rnormf( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], ptr addrspace(4) noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL6RNORMFIPKF_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// AMDGCNSPIRV: [[WHILE_BODY_I]]: +// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[FLOAT_TBAA17]] // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4 // AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] -// AMDGCNSPIRV: _ZL6rnormfiPKf.exit: -// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL6RNORMFIPKF_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] +// AMDGCNSPIRV: [[_ZL6RNORMFIPKF_EXIT]]: +// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // @@ -5689,103 +6373,108 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { return rnormf(x, y); } -// DEFAULT-LABEL: @test_rnorm( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// DEFAULT: while.body.i: -// DEFAULT-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// DEFAULT-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// DEFAULT-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// DEFAULT-LABEL: define dso_local noundef double @test_rnorm( +// DEFAULT-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*]]: +// DEFAULT-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// DEFAULT: [[WHILE_BODY_I]]: +// DEFAULT-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// DEFAULT-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// DEFAULT-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // DEFAULT-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]] // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // DEFAULT-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] // DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // DEFAULT-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] -// DEFAULT: _ZL5rnormiPKd.exit: -// DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] +// DEFAULT: [[_ZL5RNORMIPKD_EXIT]]: +// DEFAULT-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_rnorm( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// FINITEONLY: while.body.i: -// FINITEONLY-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// FINITEONLY-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// FINITEONLY-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rnorm( +// FINITEONLY-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*]]: +// FINITEONLY-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// FINITEONLY: [[WHILE_BODY_I]]: +// FINITEONLY-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// FINITEONLY-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// FINITEONLY-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // FINITEONLY-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]] // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract double [[TMP0]], [[TMP0]] // FINITEONLY-NEXT: [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]] // FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // FINITEONLY-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] -// FINITEONLY: _ZL5rnormiPKd.exit: -// FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] +// FINITEONLY: [[_ZL5RNORMIPKD_EXIT]]: +// FINITEONLY-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_rnorm( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// APPROX-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// APPROX: while.body.i: -// APPROX-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// APPROX-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// APPROX-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// APPROX-LABEL: define dso_local noundef double @test_rnorm( +// APPROX-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*]]: +// APPROX-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// APPROX-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// APPROX: [[WHILE_BODY_I]]: +// APPROX-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// APPROX-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// APPROX-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // APPROX-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] +// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA18]] // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // APPROX-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] // APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // APPROX-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] -// APPROX: _ZL5rnormiPKd.exit: -// APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] +// APPROX: [[_ZL5RNORMIPKD_EXIT]]: +// APPROX-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_rnorm( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// NCRDIV: while.body.i: -// NCRDIV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// NCRDIV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// NCRDIV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// NCRDIV-LABEL: define dso_local noundef double @test_rnorm( +// NCRDIV-SAME: i32 noundef [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*]]: +// NCRDIV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// NCRDIV: [[WHILE_BODY_I]]: +// NCRDIV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// NCRDIV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// NCRDIV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // NCRDIV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA19]] // NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // NCRDIV-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] // NCRDIV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // NCRDIV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] -// NCRDIV: _ZL5rnormiPKd.exit: -// NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// NCRDIV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] +// NCRDIV: [[_ZL5RNORMIPKD_EXIT]]: +// NCRDIV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rnorm( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], ptr addrspace(4) noundef readonly captures(none) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*]]: +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label %[[_ZL5RNORMIPKD_EXIT:.*]], label %[[WHILE_BODY_I:.*]] +// AMDGCNSPIRV: [[WHILE_BODY_I]]: +// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], %[[WHILE_BODY_I]] ], [ 0.000000e+00, %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], %[[WHILE_BODY_I]] ], [ [[Y]], %[[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], %[[WHILE_BODY_I]] ], [ [[X]], %[[ENTRY]] ] // AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[DOUBLE_TBAA19]] // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] // AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8 // AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] -// AMDGCNSPIRV: _ZL5rnormiPKd.exit: -// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[_ZL5RNORMIPKD_EXIT]], label %[[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] +// AMDGCNSPIRV: [[_ZL5RNORMIPKD_EXIT]]: +// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_I]], %[[WHILE_BODY_I]] ] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // @@ -5793,383 +6482,466 @@ extern "C" __device__ double test_rnorm(int x, const double* y) { return rnorm(x, y); } -// DEFAULT-LABEL: @test_rnorm3df( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_rnorm3df( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_rnorm3df( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rnorm3df( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_rnorm3df( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_rnorm3df( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_rnorm3df( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_rnorm3df( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm3df( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rnorm3df( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen3_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_rnorm3df(float x, float y, float z) { return rnorm3df(x, y, z); } -// DEFAULT-LABEL: @test_rnorm3d( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_rnorm3d( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_rnorm3d( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rnorm3d( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_rnorm3d( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_rnorm3d( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_rnorm3d( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_rnorm3d( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm3d( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rnorm3d( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen3_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_rnorm3d(double x, double y, double z) { return rnorm3d(x, y, z); } -// DEFAULT-LABEL: @test_rnorm4df( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test_rnorm4df( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_rnorm4df( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rnorm4df( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]], float noundef nofpclass(nan inf) [[Z]], float noundef nofpclass(nan inf) [[W]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_rnorm4df( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test_rnorm4df( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_rnorm4df( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test_rnorm4df( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm4df( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rnorm4df( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen4_f32(float noundef [[X]], float noundef [[Y]], float noundef [[Z]], float noundef [[W]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_rnorm4df(float x, float y, float z, float w) { return rnorm4df(x, y, z, w); } -// DEFAULT-LABEL: @test_rnorm4d( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef double @test_rnorm4d( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_rnorm4d( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rnorm4d( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) [[Y]], double noundef nofpclass(nan inf) [[Z]], double noundef nofpclass(nan inf) [[W]]) #[[ATTR14]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_rnorm4d( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef double @test_rnorm4d( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_rnorm4d( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef double @test_rnorm4d( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR14]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm4d( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rnorm4d( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen4_f64(double noundef [[X]], double noundef [[Y]], double noundef [[Z]], double noundef [[W]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_rnorm4d(double x, double y, double z, double w) { return rnorm4d(x, y, z, w); } -// DEFAULT-LABEL: @test_roundf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_roundf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_roundf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.round.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_roundf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.round.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_roundf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_roundf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_roundf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_roundf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_roundf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.round.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_roundf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.round.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_roundf(float x) { return roundf(x); } -// DEFAULT-LABEL: @test_round( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_round( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_round( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.round.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_round( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.round.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_round( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_round( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_round( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_round( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_round( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.round.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_round( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.round.f64(double [[X]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_round(double x) { return round(x); } -// DEFAULT-LABEL: @test_rsqrtf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_rsqrtf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_rsqrtf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_rsqrtf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_rsqrtf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_rsqrtf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_rsqrtf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_rsqrtf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rsqrtf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_rsqrtf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_rsqrtf(float x) { return rsqrtf(x); } -// DEFAULT-LABEL: @test_rsqrt( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_rsqrt( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_rsqrt( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_rsqrt( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_rsqrt( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_rsqrt( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_rsqrt( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_rsqrt( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rsqrt( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_rsqrt( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_rsqrt(double x) { return rsqrt(x); } -// DEFAULT-LABEL: @test_scalblnf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// DEFAULT-LABEL: define dso_local noundef float @test_scalblnf( +// DEFAULT-SAME: float noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]]) +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[CONV_I]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_scalblnf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_scalblnf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X:%.*]], i32 [[CONV_I]]) +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X]], i32 [[CONV_I]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_scalblnf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// APPROX-LABEL: define dso_local noundef float @test_scalblnf( +// APPROX-SAME: float noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]]) +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[CONV_I]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_scalblnf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// NCRDIV-LABEL: define dso_local noundef float @test_scalblnf( +// NCRDIV-SAME: float noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // NCRDIV-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]]) +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[CONV_I]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_scalblnf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_scalblnf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]]) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X]], i32 [[CONV_I]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_scalblnf(float x, long int y) { return scalblnf(x, y); } -// DEFAULT-LABEL: @test_scalbln( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// DEFAULT-LABEL: define dso_local noundef double @test_scalbln( +// DEFAULT-SAME: double noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]]) +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[CONV_I]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_scalbln( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_scalbln( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X:%.*]], i32 [[CONV_I]]) +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X]], i32 [[CONV_I]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_scalbln( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// APPROX-LABEL: define dso_local noundef double @test_scalbln( +// APPROX-SAME: double noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]]) +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[CONV_I]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_scalbln( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// NCRDIV-LABEL: define dso_local noundef double @test_scalbln( +// NCRDIV-SAME: double noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // NCRDIV-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]]) +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[CONV_I]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_scalbln( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_scalbln( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y]], i64 -2147483648) // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]]) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X]], i32 [[CONV_I]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_scalbln(double x, long int y) { return scalbln(x, y); } -// DEFAULT-LABEL: @test_scalbnf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_scalbnf( +// DEFAULT-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_scalbnf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X:%.*]], i32 [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_scalbnf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float nofpclass(nan inf) [[X]], i32 [[Y]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_scalbnf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_scalbnf( +// APPROX-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_scalbnf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_scalbnf( +// NCRDIV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_scalbnf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_scalbnf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_scalbnf(float x, int y) { return scalbnf(x, y); } -// DEFAULT-LABEL: @test_scalbn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_scalbn( +// DEFAULT-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_scalbn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X:%.*]], i32 [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_scalbn( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double nofpclass(nan inf) [[X]], i32 [[Y]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_scalbn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_scalbn( +// APPROX-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_scalbn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_scalbn( +// NCRDIV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_scalbn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_scalbn( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X]], i32 [[Y]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_scalbn(double x, int y) { return scalbn(x, y); } -// CHECK-LABEL: @test___signbitf( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X:%.*]] to i32 -// CHECK-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 -// CHECK-NEXT: ret i32 [[DOTLOBIT]] -// -// AMDGCNSPIRV-LABEL: @test___signbitf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = bitcast float [[X:%.*]] to i32 +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___signbitf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32 +// DEFAULT-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 +// DEFAULT-NEXT: ret i32 [[DOTLOBIT]] +// +// FINITEONLY-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___signbitf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32 +// FINITEONLY-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 +// FINITEONLY-NEXT: ret i32 [[DOTLOBIT]] +// +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___signbitf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32 +// APPROX-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 +// APPROX-NEXT: ret i32 [[DOTLOBIT]] +// +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___signbitf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32 +// NCRDIV-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 +// NCRDIV-NEXT: ret i32 [[DOTLOBIT]] +// +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___signbitf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32 // AMDGCNSPIRV-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 // AMDGCNSPIRV-NEXT: ret i32 [[DOTLOBIT]] // @@ -6177,16 +6949,42 @@ extern "C" __device__ BOOL_TYPE test___signbitf(float x) { return __signbitf(x); } -// CHECK-LABEL: @test___signbit( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[X:%.*]] to i64 -// CHECK-NEXT: [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63 -// CHECK-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___signbit( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = bitcast double [[X]] to i64 +// DEFAULT-NEXT: [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63 +// DEFAULT-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 +// DEFAULT-NEXT: ret i32 [[CONV]] +// +// FINITEONLY-LABEL: define dso_local range(i32 0, 2) i32 @test___signbit( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = bitcast double [[X]] to i64 +// FINITEONLY-NEXT: [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63 +// FINITEONLY-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 +// FINITEONLY-NEXT: ret i32 [[CONV]] +// +// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___signbit( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = bitcast double [[X]] to i64 +// APPROX-NEXT: [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63 +// APPROX-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 +// APPROX-NEXT: ret i32 [[CONV]] +// +// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___signbit( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = bitcast double [[X]] to i64 +// NCRDIV-NEXT: [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63 +// NCRDIV-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 +// NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___signbit( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = bitcast double [[X:%.*]] to i64 +// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___signbit( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = bitcast double [[X]] to i64 // AMDGCNSPIRV-NEXT: [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63 // AMDGCNSPIRV-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 // AMDGCNSPIRV-NEXT: ret i32 [[CONV]] @@ -6195,59 +6993,64 @@ extern "C" __device__ BOOL_TYPE test___signbit(double x) { return __signbit(x); } -// DEFAULT-LABEL: @test_sincosf( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local void @test_sincosf( +// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]] -// DEFAULT-NEXT: store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] +// DEFAULT-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // DEFAULT-NEXT: ret void // -// FINITEONLY-LABEL: @test_sincosf( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local void @test_sincosf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]] -// FINITEONLY-NEXT: store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] +// FINITEONLY-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // FINITEONLY-NEXT: ret void // -// APPROX-LABEL: @test_sincosf( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local void @test_sincosf( +// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] -// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]] -// APPROX-NEXT: store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] +// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] +// APPROX-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // APPROX-NEXT: ret void // -// NCRDIV-LABEL: @test_sincosf( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local void @test_sincosf( +// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17]] -// NCRDIV-NEXT: store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA17]] +// NCRDIV-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // NCRDIV-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test_sincosf( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func void @test_sincosf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincos_f32(float noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[FLOAT_TBAA17]] +// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[FLOAT_TBAA17]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret void // @@ -6255,59 +7058,64 @@ extern "C" __device__ void test_sincosf(float x, float *y, float *z) { sincosf(x, y, z); } -// DEFAULT-LABEL: @test_sincos( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local void @test_sincos( +// DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]] -// DEFAULT-NEXT: store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] +// DEFAULT-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] // DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // DEFAULT-NEXT: ret void // -// FINITEONLY-LABEL: @test_sincos( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local void @test_sincos( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]] -// FINITEONLY-NEXT: store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] +// FINITEONLY-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] // FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // FINITEONLY-NEXT: ret void // -// APPROX-LABEL: @test_sincos( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local void @test_sincos( +// APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]] -// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]] -// APPROX-NEXT: store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] +// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] +// APPROX-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] // APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // APPROX-NEXT: ret void // -// NCRDIV-LABEL: @test_sincos( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local void @test_sincos( +// NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19]] -// NCRDIV-NEXT: store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA19]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA19]] +// NCRDIV-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]] // NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // NCRDIV-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test_sincos( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func void @test_sincos( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincos_f64(double noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[DOUBLE_TBAA19]] +// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret void // @@ -6315,59 +7123,64 @@ extern "C" __device__ void test_sincos(double x, double *y, double *z) { sincos(x, y, z); } -// DEFAULT-LABEL: @test_sincospif( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local void @test_sincospif( +// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]] -// DEFAULT-NEXT: store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] +// DEFAULT-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // DEFAULT-NEXT: ret void // -// FINITEONLY-LABEL: @test_sincospif( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local void @test_sincospif( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]] -// FINITEONLY-NEXT: store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] +// FINITEONLY-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // FINITEONLY-NEXT: ret void // -// APPROX-LABEL: @test_sincospif( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local void @test_sincospif( +// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] -// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]] -// APPROX-NEXT: store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] +// APPROX-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA16]] +// APPROX-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // APPROX-NEXT: ret void // -// NCRDIV-LABEL: @test_sincospif( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local void @test_sincospif( +// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5) // NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17]] -// NCRDIV-NEXT: store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[FLOAT_TBAA17]] +// NCRDIV-NEXT: store float [[TMP0]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // NCRDIV-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test_sincospif( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func void @test_sincospif( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincospi_f32(float noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[FLOAT_TBAA17]] +// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z]], align 4, !tbaa [[FLOAT_TBAA17]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret void // @@ -6375,59 +7188,64 @@ extern "C" __device__ void test_sincospif(float x, float *y, float *z) { sincospif(x, y, z); } -// DEFAULT-LABEL: @test_sincospi( -// DEFAULT-NEXT: entry: +// DEFAULT-LABEL: define dso_local void @test_sincospi( +// DEFAULT-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // DEFAULT-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// DEFAULT-NEXT: store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]] -// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]] -// DEFAULT-NEXT: store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// DEFAULT-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] +// DEFAULT-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] // DEFAULT-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // DEFAULT-NEXT: ret void // -// FINITEONLY-LABEL: @test_sincospi( -// FINITEONLY-NEXT: entry: +// FINITEONLY-LABEL: define dso_local void @test_sincospi( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // FINITEONLY-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// FINITEONLY-NEXT: store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]] -// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]] -// FINITEONLY-NEXT: store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// FINITEONLY-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] +// FINITEONLY-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] // FINITEONLY-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // FINITEONLY-NEXT: ret void // -// APPROX-LABEL: @test_sincospi( -// APPROX-NEXT: entry: +// APPROX-LABEL: define dso_local void @test_sincospi( +// APPROX-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // APPROX-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// APPROX-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// APPROX-NEXT: store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]] -// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]] -// APPROX-NEXT: store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]] +// APPROX-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// APPROX-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA18]] +// APPROX-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA18]] +// APPROX-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA18]] // APPROX-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // APPROX-NEXT: ret void // -// NCRDIV-LABEL: @test_sincospi( -// NCRDIV-NEXT: entry: +// NCRDIV-LABEL: define dso_local void @test_sincospi( +// NCRDIV-SAME: double noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5) // NCRDIV-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] -// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] -// NCRDIV-NEXT: store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]] -// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19]] -// NCRDIV-NEXT: store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA19]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]] +// NCRDIV-NEXT: store double [[CALL_I]], ptr [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]] +// NCRDIV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[DOUBLE_TBAA19]] +// NCRDIV-NEXT: store double [[TMP0]], ptr [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]] // NCRDIV-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]] // NCRDIV-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test_sincospi( -// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-LABEL: define spir_func void @test_sincospi( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 8)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 // AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincospi_f64(double noundef [[X]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y]], align 8, !tbaa [[DOUBLE_TBAA19]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[DOUBLE_TBAA19]] +// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z]], align 8, !tbaa [[DOUBLE_TBAA19]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret void // @@ -6435,549 +7253,640 @@ extern "C" __device__ void test_sincospi(double x, double *y, double *z) { sincospi(x, y, z); } -// DEFAULT-LABEL: @test_sinf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_sinf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_sinf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_sinf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_sinf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_sinf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I1]] // -// NCRDIV-LABEL: @test_sinf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_sinf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_sinf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_sinf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sin_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_sinf(float x) { return sinf(x); } -// DEFAULT-LABEL: @test_sin( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_sin( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_sin( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_sin( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_sin( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_sin( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_sin( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_sin( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_sin( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_sin( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sin_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_sin(double x) { return sin(x); } -// DEFAULT-LABEL: @test_sinpif( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_sinpif( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_sinpif( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_sinpif( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_sinpif( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_sinpif( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_sinpif( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_sinpif( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_sinpif( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_sinpif( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sinpi_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_sinpif(float x) { return sinpif(x); } -// DEFAULT-LABEL: @test_sinpi( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_sinpi( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_sinpi( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_sinpi( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_sinpi( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_sinpi( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_sinpi( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_sinpi( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_sinpi( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_sinpi( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sinpi_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_sinpi(double x) { return sinpi(x); } -// DEFAULT-LABEL: @test_sqrtf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_sqrtf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_sqrtf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_sqrtf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_sqrtf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_sqrtf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_sqrtf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]), !fpmath [[META25:![0-9]+]] +// NCRDIV-LABEL: define dso_local noundef float @test_sqrtf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X]]), !fpmath [[META25:![0-9]+]] // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_sqrtf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_sqrtf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_sqrtf(float x) { return sqrtf(x); } -// DEFAULT-LABEL: @test_sqrt( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_sqrt( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_sqrt( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_sqrt( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_sqrt( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_sqrt( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_sqrt( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_sqrt( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_sqrt( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_sqrt( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_sqrt(double x) { return sqrt(x); } -// DEFAULT-LABEL: @test_tanf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_tanf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_tanf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_tanf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_tanf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_tanf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_tanf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_tanf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_tanf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tan_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_tanf(float x) { return tanf(x); } -// DEFAULT-LABEL: @test_tan( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_tan( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_tan( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_tan( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_tan( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_tan( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_tan( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_tan( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tan( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_tan( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tan_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_tan(double x) { return tan(x); } -// DEFAULT-LABEL: @test_tanhf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test_tanhf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_tanhf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_tanhf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_tanhf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test_tanhf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_tanhf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test_tanhf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tanhf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_tanhf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tanh_f32(float noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_tanhf(float x) { return tanhf(x); } -// DEFAULT-LABEL: @test_tanh( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef double @test_tanh( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR15]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_tanh( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_tanh( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR15]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_tanh( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef double @test_tanh( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_tanh( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef double @test_tanh( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR15]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tanh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_tanh( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tanh_f64(double noundef [[X]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_tanh(double x) { return tanh(x); } -// DEFAULT-LABEL: @test_tgammaf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_tgammaf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_tgammaf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_tgammaf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_tgammaf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_tgammaf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_tgammaf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_tgammaf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tgammaf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_tgammaf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tgamma_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_tgammaf(float x) { return tgammaf(x); } -// DEFAULT-LABEL: @test_tgamma( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_tgamma( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_tgamma( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_tgamma( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_tgamma( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_tgamma( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_tgamma( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_tgamma( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tgamma( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_tgamma( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tgamma_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_tgamma(double x) { return tgamma(x); } -// DEFAULT-LABEL: @test_truncf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_truncf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_truncf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.trunc.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_truncf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.trunc.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_truncf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_truncf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_truncf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_truncf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_truncf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.trunc.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_truncf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.trunc.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_truncf(float x) { return truncf(x); } -// DEFAULT-LABEL: @test_trunc( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_trunc( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_trunc( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.trunc.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_trunc( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.trunc.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_trunc( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_trunc( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_trunc( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_trunc( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_trunc( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.trunc.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_trunc( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.trunc.f64(double [[X]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_trunc(double x) { return trunc(x); } -// DEFAULT-LABEL: @test_y0f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_y0f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_y0f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_y0f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_y0f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_y0f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_y0f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_y0f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_y0f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_y0f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_y0f(float x) { return y0f(x); } -// DEFAULT-LABEL: @test_y0( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_y0( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_y0( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_y0( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_y0( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_y0( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_y0( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_y0( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_y0( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_y0( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_y0(double x) { return y0(x); } -// DEFAULT-LABEL: @test_y1f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test_y1f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test_y1f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_y1f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test_y1f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test_y1f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test_y1f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test_y1f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_y1f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_y1f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test_y1f(float x) { return y1f(x); } -// DEFAULT-LABEL: @test_y1( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef double @test_y1( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret double [[CALL_I]] // -// FINITEONLY-LABEL: @test_y1( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_y1( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret double [[CALL_I]] // -// APPROX-LABEL: @test_y1( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef double @test_y1( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret double [[CALL_I]] // -// NCRDIV-LABEL: @test_y1( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef double @test_y1( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_y1( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_y1( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] // extern "C" __device__ double test_y1(double x) { return y1(x); } -// DEFAULT-LABEL: @test_ynf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// DEFAULT-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// DEFAULT-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// DEFAULT-LABEL: define dso_local float @test_ynf( +// DEFAULT-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// DEFAULT-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// DEFAULT-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // DEFAULT-NEXT: ] -// DEFAULT: if.then.i: -// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]] -// DEFAULT-NEXT: br label [[_ZL3YNFIF_EXIT:%.*]] -// DEFAULT: if.then2.i: +// DEFAULT: [[IF_THEN_I]]: +// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: br label %[[_ZL3YNFIF_EXIT:.*]] +// DEFAULT: [[IF_THEN2_I]]: // DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] -// DEFAULT-NEXT: br label [[_ZL3YNFIF_EXIT]] -// DEFAULT: if.end4.i: +// DEFAULT-NEXT: br label %[[_ZL3YNFIF_EXIT]] +// DEFAULT: [[IF_END4_I]]: // DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] // DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] // DEFAULT-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// DEFAULT-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]] -// DEFAULT: for.body.i: -// DEFAULT-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// DEFAULT-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]] +// DEFAULT: [[FOR_BODY_I]]: +// DEFAULT-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // DEFAULT-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // DEFAULT-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] @@ -6985,32 +7894,33 @@ extern "C" __device__ double test_y1(double x) { // DEFAULT-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // DEFAULT-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] -// DEFAULT: _ZL3ynfif.exit: -// DEFAULT-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] +// DEFAULT: [[_ZL3YNFIF_EXIT]]: +// DEFAULT-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // DEFAULT-NEXT: ret float [[RETVAL_0_I]] // -// FINITEONLY-LABEL: @test_ynf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// FINITEONLY-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// FINITEONLY-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_ynf( +// FINITEONLY-SAME: i32 noundef [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// FINITEONLY-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// FINITEONLY-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // FINITEONLY-NEXT: ] -// FINITEONLY: if.then.i: -// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]] -// FINITEONLY-NEXT: br label [[_ZL3YNFIF_EXIT:%.*]] -// FINITEONLY: if.then2.i: +// FINITEONLY: [[IF_THEN_I]]: +// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: br label %[[_ZL3YNFIF_EXIT:.*]] +// FINITEONLY: [[IF_THEN2_I]]: // FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] -// FINITEONLY-NEXT: br label [[_ZL3YNFIF_EXIT]] -// FINITEONLY: if.end4.i: +// FINITEONLY-NEXT: br label %[[_ZL3YNFIF_EXIT]] +// FINITEONLY: [[IF_END4_I]]: // FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] // FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] // FINITEONLY-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// FINITEONLY-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]] -// FINITEONLY: for.body.i: -// FINITEONLY-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// FINITEONLY-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]] +// FINITEONLY: [[FOR_BODY_I]]: +// FINITEONLY-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // FINITEONLY-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // FINITEONLY-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float [[CONV_I]], [[Y]] @@ -7018,32 +7928,33 @@ extern "C" __device__ double test_y1(double x) { // FINITEONLY-NEXT: [[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], [[__X0_0_I2]] // FINITEONLY-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] -// FINITEONLY: _ZL3ynfif.exit: -// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] +// FINITEONLY: [[_ZL3YNFIF_EXIT]]: +// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // FINITEONLY-NEXT: ret float [[RETVAL_0_I]] // -// APPROX-LABEL: @test_ynf( -// APPROX-NEXT: entry: -// APPROX-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// APPROX-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// APPROX-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// APPROX-LABEL: define dso_local float @test_ynf( +// APPROX-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// APPROX-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// APPROX-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // APPROX-NEXT: ] -// APPROX: if.then.i: -// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]] -// APPROX-NEXT: br label [[_ZL3YNFIF_EXIT:%.*]] -// APPROX: if.then2.i: +// APPROX: [[IF_THEN_I]]: +// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: br label %[[_ZL3YNFIF_EXIT:.*]] +// APPROX: [[IF_THEN2_I]]: // APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] -// APPROX-NEXT: br label [[_ZL3YNFIF_EXIT]] -// APPROX: if.end4.i: +// APPROX-NEXT: br label %[[_ZL3YNFIF_EXIT]] +// APPROX: [[IF_END4_I]]: // APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] // APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] // APPROX-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// APPROX-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]] -// APPROX: for.body.i: -// APPROX-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// APPROX-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// APPROX-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// APPROX-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]] +// APPROX: [[FOR_BODY_I]]: +// APPROX-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// APPROX-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// APPROX-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // APPROX-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // APPROX-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] @@ -7051,32 +7962,33 @@ extern "C" __device__ double test_y1(double x) { // APPROX-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // APPROX-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // APPROX-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// APPROX-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] -// APPROX: _ZL3ynfif.exit: -// APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// APPROX-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] +// APPROX: [[_ZL3YNFIF_EXIT]]: +// APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // APPROX-NEXT: ret float [[RETVAL_0_I]] // -// NCRDIV-LABEL: @test_ynf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// NCRDIV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// NCRDIV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// NCRDIV-LABEL: define dso_local float @test_ynf( +// NCRDIV-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// NCRDIV-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// NCRDIV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // NCRDIV-NEXT: ] -// NCRDIV: if.then.i: -// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]] -// NCRDIV-NEXT: br label [[_ZL3YNFIF_EXIT:%.*]] -// NCRDIV: if.then2.i: +// NCRDIV: [[IF_THEN_I]]: +// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: br label %[[_ZL3YNFIF_EXIT:.*]] +// NCRDIV: [[IF_THEN2_I]]: // NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] -// NCRDIV-NEXT: br label [[_ZL3YNFIF_EXIT]] -// NCRDIV: if.end4.i: +// NCRDIV-NEXT: br label %[[_ZL3YNFIF_EXIT]] +// NCRDIV: [[IF_END4_I]]: // NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]] // NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]] // NCRDIV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// NCRDIV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]] -// NCRDIV: for.body.i: -// NCRDIV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// NCRDIV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// NCRDIV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]] +// NCRDIV: [[FOR_BODY_I]]: +// NCRDIV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// NCRDIV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // NCRDIV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]], !fpmath [[META12]] @@ -7084,32 +7996,33 @@ extern "C" __device__ double test_y1(double x) { // NCRDIV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // NCRDIV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// NCRDIV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]] -// NCRDIV: _ZL3ynfif.exit: -// NCRDIV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// NCRDIV-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]] +// NCRDIV: [[_ZL3YNFIF_EXIT]]: +// NCRDIV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // NCRDIV-NEXT: ret float [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test_ynf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func float @test_ynf( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// AMDGCNSPIRV-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// AMDGCNSPIRV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then.i: -// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL3YNFIF_EXIT:%.*]] -// AMDGCNSPIRV: if.then2.i: +// AMDGCNSPIRV: [[IF_THEN_I]]: +// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label %[[_ZL3YNFIF_EXIT:.*]] +// AMDGCNSPIRV: [[IF_THEN2_I]]: // AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL3YNFIF_EXIT]] -// AMDGCNSPIRV: if.end4.i: +// AMDGCNSPIRV-NEXT: br label %[[_ZL3YNFIF_EXIT]] +// AMDGCNSPIRV: [[IF_END4_I]]: // AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]] -// AMDGCNSPIRV: for.body.i: -// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL3YNFIF_EXIT]] +// AMDGCNSPIRV: [[FOR_BODY_I]]: +// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] @@ -7117,36 +8030,37 @@ extern "C" __device__ double test_y1(double x) { // AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] // AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] -// AMDGCNSPIRV: _ZL3ynfif.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL3YNFIF_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] +// AMDGCNSPIRV: [[_ZL3YNFIF_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // AMDGCNSPIRV-NEXT: ret float [[RETVAL_0_I]] // extern "C" __device__ float test_ynf(int x, float y) { return ynf(x, y); } -// DEFAULT-LABEL: @test_yn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// DEFAULT-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// DEFAULT-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// DEFAULT-LABEL: define dso_local double @test_yn( +// DEFAULT-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// DEFAULT-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// DEFAULT-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // DEFAULT-NEXT: ] -// DEFAULT: if.then.i: -// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]] -// DEFAULT-NEXT: br label [[_ZL2YNID_EXIT:%.*]] -// DEFAULT: if.then2.i: +// DEFAULT: [[IF_THEN_I]]: +// DEFAULT-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] +// DEFAULT-NEXT: br label %[[_ZL2YNID_EXIT:.*]] +// DEFAULT: [[IF_THEN2_I]]: // DEFAULT-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] -// DEFAULT-NEXT: br label [[_ZL2YNID_EXIT]] -// DEFAULT: if.end4.i: +// DEFAULT-NEXT: br label %[[_ZL2YNID_EXIT]] +// DEFAULT: [[IF_END4_I]]: // DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] // DEFAULT-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] // DEFAULT-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// DEFAULT-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]] -// DEFAULT: for.body.i: -// DEFAULT-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// DEFAULT-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]] +// DEFAULT: [[FOR_BODY_I]]: +// DEFAULT-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // DEFAULT-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // DEFAULT-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] @@ -7154,32 +8068,33 @@ extern "C" __device__ float test_ynf(int x, float y) { // DEFAULT-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] // DEFAULT-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] -// DEFAULT: _ZL2ynid.exit: -// DEFAULT-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] +// DEFAULT: [[_ZL2YNID_EXIT]]: +// DEFAULT-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // DEFAULT-NEXT: ret double [[RETVAL_0_I]] // -// FINITEONLY-LABEL: @test_yn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// FINITEONLY-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// FINITEONLY-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test_yn( +// FINITEONLY-SAME: i32 noundef [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// FINITEONLY-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// FINITEONLY-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // FINITEONLY-NEXT: ] -// FINITEONLY: if.then.i: -// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]] -// FINITEONLY-NEXT: br label [[_ZL2YNID_EXIT:%.*]] -// FINITEONLY: if.then2.i: +// FINITEONLY: [[IF_THEN_I]]: +// FINITEONLY-NEXT: [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] +// FINITEONLY-NEXT: br label %[[_ZL2YNID_EXIT:.*]] +// FINITEONLY: [[IF_THEN2_I]]: // FINITEONLY-NEXT: [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] -// FINITEONLY-NEXT: br label [[_ZL2YNID_EXIT]] -// FINITEONLY: if.end4.i: +// FINITEONLY-NEXT: br label %[[_ZL2YNID_EXIT]] +// FINITEONLY: [[IF_END4_I]]: // FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] // FINITEONLY-NEXT: [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]] // FINITEONLY-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// FINITEONLY-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]] -// FINITEONLY: for.body.i: -// FINITEONLY-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// FINITEONLY-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]] +// FINITEONLY: [[FOR_BODY_I]]: +// FINITEONLY-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // FINITEONLY-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // FINITEONLY-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract double [[CONV_I]], [[Y]] @@ -7187,32 +8102,33 @@ extern "C" __device__ float test_ynf(int x, float y) { // FINITEONLY-NEXT: [[SUB_I]] = fsub nnan ninf contract double [[MUL8_I]], [[__X0_0_I2]] // FINITEONLY-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] -// FINITEONLY: _ZL2ynid.exit: -// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] +// FINITEONLY: [[_ZL2YNID_EXIT]]: +// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // FINITEONLY-NEXT: ret double [[RETVAL_0_I]] // -// APPROX-LABEL: @test_yn( -// APPROX-NEXT: entry: -// APPROX-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// APPROX-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// APPROX-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// APPROX-LABEL: define dso_local double @test_yn( +// APPROX-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// APPROX-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// APPROX-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // APPROX-NEXT: ] -// APPROX: if.then.i: -// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]] -// APPROX-NEXT: br label [[_ZL2YNID_EXIT:%.*]] -// APPROX: if.then2.i: +// APPROX: [[IF_THEN_I]]: +// APPROX-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] +// APPROX-NEXT: br label %[[_ZL2YNID_EXIT:.*]] +// APPROX: [[IF_THEN2_I]]: // APPROX-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] -// APPROX-NEXT: br label [[_ZL2YNID_EXIT]] -// APPROX: if.end4.i: +// APPROX-NEXT: br label %[[_ZL2YNID_EXIT]] +// APPROX: [[IF_END4_I]]: // APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] // APPROX-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] // APPROX-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// APPROX-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]] -// APPROX: for.body.i: -// APPROX-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// APPROX-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// APPROX-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// APPROX-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]] +// APPROX: [[FOR_BODY_I]]: +// APPROX-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// APPROX-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// APPROX-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // APPROX-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // APPROX-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] @@ -7220,32 +8136,33 @@ extern "C" __device__ float test_ynf(int x, float y) { // APPROX-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] // APPROX-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // APPROX-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// APPROX-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] -// APPROX: _ZL2ynid.exit: -// APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// APPROX-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] +// APPROX: [[_ZL2YNID_EXIT]]: +// APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // APPROX-NEXT: ret double [[RETVAL_0_I]] // -// NCRDIV-LABEL: @test_yn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// NCRDIV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// NCRDIV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// NCRDIV-LABEL: define dso_local double @test_yn( +// NCRDIV-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// NCRDIV-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// NCRDIV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // NCRDIV-NEXT: ] -// NCRDIV: if.then.i: -// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]] -// NCRDIV-NEXT: br label [[_ZL2YNID_EXIT:%.*]] -// NCRDIV: if.then2.i: +// NCRDIV: [[IF_THEN_I]]: +// NCRDIV-NEXT: [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] +// NCRDIV-NEXT: br label %[[_ZL2YNID_EXIT:.*]] +// NCRDIV: [[IF_THEN2_I]]: // NCRDIV-NEXT: [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] -// NCRDIV-NEXT: br label [[_ZL2YNID_EXIT]] -// NCRDIV: if.end4.i: +// NCRDIV-NEXT: br label %[[_ZL2YNID_EXIT]] +// NCRDIV: [[IF_END4_I]]: // NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]] // NCRDIV-NEXT: [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]] // NCRDIV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// NCRDIV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]] -// NCRDIV: for.body.i: -// NCRDIV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// NCRDIV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// NCRDIV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]] +// NCRDIV: [[FOR_BODY_I]]: +// NCRDIV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// NCRDIV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// NCRDIV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // NCRDIV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] @@ -7253,32 +8170,33 @@ extern "C" __device__ float test_ynf(int x, float y) { // NCRDIV-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] // NCRDIV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // NCRDIV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// NCRDIV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP27:![0-9]+]] -// NCRDIV: _ZL2ynid.exit: -// NCRDIV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// NCRDIV-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP27:![0-9]+]] +// NCRDIV: [[_ZL2YNID_EXIT]]: +// NCRDIV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // NCRDIV-NEXT: ret double [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test_yn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func double @test_yn( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: switch i32 [[X]], label %[[IF_END4_I:.*]] [ +// AMDGCNSPIRV-NEXT: i32 0, label %[[IF_THEN_I:.*]] +// AMDGCNSPIRV-NEXT: i32 1, label %[[IF_THEN2_I:.*]] // AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then.i: -// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL2YNID_EXIT:%.*]] -// AMDGCNSPIRV: if.then2.i: +// AMDGCNSPIRV: [[IF_THEN_I]]: +// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label %[[_ZL2YNID_EXIT:.*]] +// AMDGCNSPIRV: [[IF_THEN2_I]]: // AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL2YNID_EXIT]] -// AMDGCNSPIRV: if.end4.i: +// AMDGCNSPIRV-NEXT: br label %[[_ZL2YNID_EXIT]] +// AMDGCNSPIRV: [[IF_END4_I]]: // AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]] -// AMDGCNSPIRV: for.body.i: -// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label %[[FOR_BODY_I:.*]], label %[[_ZL2YNID_EXIT]] +// AMDGCNSPIRV: [[FOR_BODY_I]]: +// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], %[[FOR_BODY_I]] ], [ 1, %[[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], %[[FOR_BODY_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], %[[FOR_BODY_I]] ], [ [[CALL_I_I]], %[[IF_END4_I]] ] // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 // AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] @@ -7286,71 +8204,81 @@ extern "C" __device__ float test_ynf(int x, float y) { // AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] // AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 // AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]] -// AMDGCNSPIRV: _ZL2ynid.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label %[[_ZL2YNID_EXIT]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]] +// AMDGCNSPIRV: [[_ZL2YNID_EXIT]]: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], %[[IF_THEN_I]] ], [ [[CALL_I22_I]], %[[IF_THEN2_I]] ], [ [[CALL_I21_I]], %[[IF_END4_I]] ], [ [[SUB_I]], %[[FOR_BODY_I]] ] // AMDGCNSPIRV-NEXT: ret double [[RETVAL_0_I]] // extern "C" __device__ double test_yn(int x, double y) { return yn(x, y); } -// DEFAULT-LABEL: @test___cosf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test___cosf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test___cosf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___cosf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test___cosf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test___cosf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test___cosf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test___cosf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test___cosf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___cosf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test___cosf(float x) { return __cosf(x); } -// DEFAULT-LABEL: @test___exp10f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000 +// DEFAULT-LABEL: define dso_local noundef float @test___exp10f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], 0x400A934F00000000 // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test___exp10f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[X:%.*]], 0x400A934F00000000 +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___exp10f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[X]], 0x400A934F00000000 // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test___exp10f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000 +// APPROX-LABEL: define dso_local noundef float @test___exp10f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], 0x400A934F00000000 // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test___exp10f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000 +// NCRDIV-LABEL: define dso_local noundef float @test___exp10f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], 0x400A934F00000000 // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___exp10f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000 +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___exp10f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], 0x400A934F00000000 // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // @@ -7358,33 +8286,38 @@ extern "C" __device__ float test___exp10f(float x) { return __exp10f(x); } -// DEFAULT-LABEL: @test___expf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000 +// DEFAULT-LABEL: define dso_local noundef float @test___expf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], 0x3FF7154760000000 // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test___expf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[X:%.*]], 0x3FF7154760000000 +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___expf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[X]], 0x3FF7154760000000 // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test___expf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000 +// APPROX-LABEL: define dso_local noundef float @test___expf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], 0x3FF7154760000000 // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test___expf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000 +// NCRDIV-LABEL: define dso_local noundef float @test___expf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], 0x3FF7154760000000 // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___expf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000 +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___expf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], 0x3FF7154760000000 // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // @@ -7392,389 +8325,454 @@ extern "C" __device__ float test___expf(float x) { return __expf(x); } -// DEFAULT-LABEL: @test___fadd_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]] +// DEFAULT-LABEL: define dso_local noundef float @test___fadd_rn( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[ADD_I:%.*]] = fadd contract float [[X]], [[Y]] // DEFAULT-NEXT: ret float [[ADD_I]] // -// FINITEONLY-LABEL: @test___fadd_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[ADD_I:%.*]] = fadd nnan ninf contract float [[X:%.*]], [[Y:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___fadd_rn( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[ADD_I:%.*]] = fadd nnan ninf contract float [[X]], [[Y]] // FINITEONLY-NEXT: ret float [[ADD_I]] // -// APPROX-LABEL: @test___fadd_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]] +// APPROX-LABEL: define dso_local noundef float @test___fadd_rn( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[ADD_I:%.*]] = fadd contract float [[X]], [[Y]] // APPROX-NEXT: ret float [[ADD_I]] // -// NCRDIV-LABEL: @test___fadd_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]] +// NCRDIV-LABEL: define dso_local noundef float @test___fadd_rn( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[ADD_I:%.*]] = fadd contract float [[X]], [[Y]] // NCRDIV-NEXT: ret float [[ADD_I]] // -// AMDGCNSPIRV-LABEL: @test___fadd_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fadd_rn( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = fadd contract float [[X]], [[Y]] // AMDGCNSPIRV-NEXT: ret float [[ADD_I]] // extern "C" __device__ float test___fadd_rn(float x, float y) { return __fadd_rn(x, y); } -// DEFAULT-LABEL: @test___fdividef( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] +// DEFAULT-LABEL: define dso_local noundef float @test___fdividef( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]] // DEFAULT-NEXT: ret float [[DIV_I]] // -// FINITEONLY-LABEL: @test___fdividef( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float [[X:%.*]], [[Y:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___fdividef( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float [[X]], [[Y]] // FINITEONLY-NEXT: ret float [[DIV_I]] // -// APPROX-LABEL: @test___fdividef( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] +// APPROX-LABEL: define dso_local noundef float @test___fdividef( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]] // APPROX-NEXT: ret float [[DIV_I]] // -// NCRDIV-LABEL: @test___fdividef( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]], !fpmath [[META12]] +// NCRDIV-LABEL: define dso_local noundef float @test___fdividef( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]], !fpmath [[META12]] // NCRDIV-NEXT: ret float [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test___fdividef( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fdividef( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X]], [[Y]] // AMDGCNSPIRV-NEXT: ret float [[DIV_I]] // extern "C" __device__ float test___fdividef(float x, float y) { return __fdividef(x, y); } -// DEFAULT-LABEL: @test__fmaf_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test__fmaf_rn( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test__fmaf_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fma.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]], float nofpclass(nan inf) [[Z:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test__fmaf_rn( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.fma.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]], float nofpclass(nan inf) [[Z]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test__fmaf_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test__fmaf_rn( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test__fmaf_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test__fmaf_rn( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test__fmaf_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test__fmaf_rn( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test__fmaf_rn(float x, float y, float z) { return __fmaf_rn(x, y, z); } -// DEFAULT-LABEL: @test___fmul_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]] +// DEFAULT-LABEL: define dso_local noundef float @test___fmul_rn( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], [[Y]] // DEFAULT-NEXT: ret float [[MUL_I]] // -// FINITEONLY-LABEL: @test___fmul_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[X:%.*]], [[Y:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___fmul_rn( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[X]], [[Y]] // FINITEONLY-NEXT: ret float [[MUL_I]] // -// APPROX-LABEL: @test___fmul_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]] +// APPROX-LABEL: define dso_local noundef float @test___fmul_rn( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], [[Y]] // APPROX-NEXT: ret float [[MUL_I]] // -// NCRDIV-LABEL: @test___fmul_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]] +// NCRDIV-LABEL: define dso_local noundef float @test___fmul_rn( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], [[Y]] // NCRDIV-NEXT: ret float [[MUL_I]] // -// AMDGCNSPIRV-LABEL: @test___fmul_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fmul_rn( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X]], [[Y]] // AMDGCNSPIRV-NEXT: ret float [[MUL_I]] // extern "C" __device__ float test___fmul_rn(float x, float y) { return __fmul_rn(x, y); } -// DEFAULT-LABEL: @test___frcp_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]] +// DEFAULT-LABEL: define dso_local noundef float @test___frcp_rn( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]] // DEFAULT-NEXT: ret float [[DIV_I]] // -// FINITEONLY-LABEL: @test___frcp_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float 1.000000e+00, [[X:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___frcp_rn( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float 1.000000e+00, [[X]] // FINITEONLY-NEXT: ret float [[DIV_I]] // -// APPROX-LABEL: @test___frcp_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]] +// APPROX-LABEL: define dso_local noundef float @test___frcp_rn( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]] // APPROX-NEXT: ret float [[DIV_I]] // -// NCRDIV-LABEL: @test___frcp_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]], !fpmath [[META12]] +// NCRDIV-LABEL: define dso_local noundef float @test___frcp_rn( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]], !fpmath [[META12]] // NCRDIV-NEXT: ret float [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test___frcp_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___frcp_rn( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X]] // AMDGCNSPIRV-NEXT: ret float [[DIV_I]] // extern "C" __device__ float test___frcp_rn(float x) { return __frcp_rn(x); } -// DEFAULT-LABEL: @test___frsqrt_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test___frsqrt_rn( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test___frsqrt_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.rsq.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___frsqrt_rn( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.rsq.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test___frsqrt_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test___frsqrt_rn( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test___frsqrt_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test___frsqrt_rn( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___frsqrt_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.rsq.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___frsqrt_rn( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.rsq.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test___frsqrt_rn(float x) { return __frsqrt_rn(x); } -// DEFAULT-LABEL: @test___fsqrt_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// DEFAULT-LABEL: define dso_local noundef float @test___fsqrt_rn( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR14]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test___fsqrt_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___fsqrt_rn( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test___fsqrt_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// APPROX-LABEL: define dso_local noundef float @test___fsqrt_rn( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test___fsqrt_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// NCRDIV-LABEL: define dso_local noundef float @test___fsqrt_rn( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR14]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test___fsqrt_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fsqrt_rn( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR4]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sqrt_f32(float noundef [[X]]) #[[ATTR12]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test___fsqrt_rn(float x) { return __fsqrt_rn(x); } -// DEFAULT-LABEL: @test___fsub_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]] +// DEFAULT-LABEL: define dso_local noundef float @test___fsub_rn( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[SUB_I:%.*]] = fsub contract float [[X]], [[Y]] // DEFAULT-NEXT: ret float [[SUB_I]] // -// FINITEONLY-LABEL: @test___fsub_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[SUB_I:%.*]] = fsub nnan ninf contract float [[X:%.*]], [[Y:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___fsub_rn( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[SUB_I:%.*]] = fsub nnan ninf contract float [[X]], [[Y]] // FINITEONLY-NEXT: ret float [[SUB_I]] // -// APPROX-LABEL: @test___fsub_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]] +// APPROX-LABEL: define dso_local noundef float @test___fsub_rn( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[SUB_I:%.*]] = fsub contract float [[X]], [[Y]] // APPROX-NEXT: ret float [[SUB_I]] // -// NCRDIV-LABEL: @test___fsub_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]] +// NCRDIV-LABEL: define dso_local noundef float @test___fsub_rn( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[SUB_I:%.*]] = fsub contract float [[X]], [[Y]] // NCRDIV-NEXT: ret float [[SUB_I]] // -// AMDGCNSPIRV-LABEL: @test___fsub_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___fsub_rn( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = fsub contract float [[X]], [[Y]] // AMDGCNSPIRV-NEXT: ret float [[SUB_I]] // extern "C" __device__ float test___fsub_rn(float x, float y) { return __fsub_rn(x, y); } -// DEFAULT-LABEL: @test___log10f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test___log10f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test___log10f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log10.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___log10f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log10.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test___log10f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test___log10f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test___log10f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test___log10f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___log10f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___log10f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test___log10f(float x) { return __log10f(x); } -// DEFAULT-LABEL: @test___log2f( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test___log2f( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test___log2f( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.log.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___log2f( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.amdgcn.log.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test___log2f( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test___log2f( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test___log2f( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test___log2f( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___log2f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.log.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___log2f( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.log.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test___log2f(float x) { return __log2f(x); } -// DEFAULT-LABEL: @test___logf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test___logf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test___logf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log.f32(float nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___logf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.log.f32(float nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test___logf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test___logf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test___logf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test___logf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___logf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___logf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test___logf(float x) { return __logf(x); } -// DEFAULT-LABEL: @test___powf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]] +// DEFAULT-LABEL: define dso_local noundef float @test___powf( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test___powf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___powf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) [[Y]]) #[[ATTR15]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test___powf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]] +// APPROX-LABEL: define dso_local noundef float @test___powf( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test___powf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]] +// NCRDIV-LABEL: define dso_local noundef float @test___powf( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR5]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR15]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test___powf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___powf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR5]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X]], float noundef [[Y]]) #[[ATTR13]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test___powf(float x, float y) { return __powf(x, y); } -// DEFAULT-LABEL: @test___saturatef( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00 +// DEFAULT-LABEL: define dso_local noundef float @test___saturatef( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X]], 0.000000e+00 // DEFAULT-NEXT: [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00 // DEFAULT-NEXT: [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]] // DEFAULT-NEXT: [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]] // DEFAULT-NEXT: ret float [[COND5_I]] // -// FINITEONLY-LABEL: @test___saturatef( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CMP_I:%.*]] = fcmp nnan ninf contract olt float [[X:%.*]], 0.000000e+00 +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___saturatef( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CMP_I:%.*]] = fcmp nnan ninf contract olt float [[X]], 0.000000e+00 // FINITEONLY-NEXT: [[CMP1_I:%.*]] = fcmp nnan ninf contract ogt float [[X]], 1.000000e+00 // FINITEONLY-NEXT: [[COND_I:%.*]] = select nnan ninf contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]] // FINITEONLY-NEXT: [[COND5_I:%.*]] = select nnan ninf contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]] // FINITEONLY-NEXT: ret float [[COND5_I]] // -// APPROX-LABEL: @test___saturatef( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00 +// APPROX-LABEL: define dso_local noundef float @test___saturatef( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X]], 0.000000e+00 // APPROX-NEXT: [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00 // APPROX-NEXT: [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]] // APPROX-NEXT: [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]] // APPROX-NEXT: ret float [[COND5_I]] // -// NCRDIV-LABEL: @test___saturatef( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00 +// NCRDIV-LABEL: define dso_local noundef float @test___saturatef( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X]], 0.000000e+00 // NCRDIV-NEXT: [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00 // NCRDIV-NEXT: [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]] // NCRDIV-NEXT: [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]] // NCRDIV-NEXT: ret float [[COND5_I]] // -// AMDGCNSPIRV-LABEL: @test___saturatef( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00 +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___saturatef( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X]], 0.000000e+00 // AMDGCNSPIRV-NEXT: [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00 // AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]] // AMDGCNSPIRV-NEXT: [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]] @@ -7784,114 +8782,129 @@ extern "C" __device__ float test___saturatef(float x) { return __saturatef(x); } -// DEFAULT-LABEL: @test___sincosf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] -// DEFAULT-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] +// DEFAULT-LABEL: define dso_local void @test___sincosf( +// DEFAULT-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// DEFAULT-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] -// DEFAULT-NEXT: store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] +// DEFAULT-NEXT: store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // DEFAULT-NEXT: ret void // -// FINITEONLY-LABEL: @test___sincosf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] -// FINITEONLY-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] +// FINITEONLY-LABEL: define dso_local void @test___sincosf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] +// FINITEONLY-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: [[CALL1_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] -// FINITEONLY-NEXT: store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] +// FINITEONLY-NEXT: store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // FINITEONLY-NEXT: ret void // -// APPROX-LABEL: @test___sincosf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] -// APPROX-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]] +// APPROX-LABEL: define dso_local void @test___sincosf( +// APPROX-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// APPROX-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] -// APPROX-NEXT: store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] +// APPROX-NEXT: store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA16]] // APPROX-NEXT: ret void // -// NCRDIV-LABEL: @test___sincosf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] -// NCRDIV-NEXT: store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]] +// NCRDIV-LABEL: define dso_local void @test___sincosf( +// NCRDIV-SAME: float noundef [[X:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] +// NCRDIV-NEXT: store float [[CALL_I]], ptr [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] -// NCRDIV-NEXT: store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]] +// NCRDIV-NEXT: store float [[CALL1_I]], ptr [[Z]], align 4, !tbaa [[FLOAT_TBAA17]] // NCRDIV-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test___sincosf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-LABEL: define spir_func void @test___sincosf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y]], align 4, !tbaa [[FLOAT_TBAA17]] // AMDGCNSPIRV-NEXT: [[CALL1_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store float [[CALL1_I]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: store float [[CALL1_I]], ptr addrspace(4) [[Z]], align 4, !tbaa [[FLOAT_TBAA17]] // AMDGCNSPIRV-NEXT: ret void // extern "C" __device__ void test___sincosf(float x, float *y, float *z) { __sincosf(x, y, z); } -// DEFAULT-LABEL: @test___sinf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local noundef float @test___sinf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: ret float [[CALL_I]] // -// FINITEONLY-LABEL: @test___sinf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test___sinf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: ret float [[CALL_I]] // -// APPROX-LABEL: @test___sinf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local noundef float @test___sinf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: ret float [[CALL_I]] // -// NCRDIV-LABEL: @test___sinf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local noundef float @test___sinf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test___sinf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___sinf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] // extern "C" __device__ float test___sinf(float x) { return __sinf(x); } -// DEFAULT-LABEL: @test___tanf( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// DEFAULT-LABEL: define dso_local float @test___tanf( +// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] // DEFAULT-NEXT: ret float [[MUL_I]] // -// FINITEONLY-LABEL: @test___tanf( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test___tanf( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[CALL_I3_I]], [[TMP0]] // FINITEONLY-NEXT: ret float [[MUL_I]] // -// APPROX-LABEL: @test___tanf( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// APPROX-LABEL: define dso_local float @test___tanf( +// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] // APPROX-NEXT: ret float [[MUL_I]] // -// NCRDIV-LABEL: @test___tanf( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]] +// NCRDIV-LABEL: define dso_local float @test___tanf( +// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) // NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] // NCRDIV-NEXT: ret float [[MUL_I]] // -// AMDGCNSPIRV-LABEL: @test___tanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I3_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-LABEL: define spir_func float @test___tanf( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[CALL_I3_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) // AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] @@ -7901,319 +8914,491 @@ extern "C" __device__ float test___tanf(float x) { return __tanf(x); } -// DEFAULT-LABEL: @test___dadd_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]] +// DEFAULT-LABEL: define dso_local noundef double @test___dadd_rn( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[ADD_I:%.*]] = fadd contract double [[X]], [[Y]] // DEFAULT-NEXT: ret double [[ADD_I]] // -// FINITEONLY-LABEL: @test___dadd_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[ADD_I:%.*]] = fadd nnan ninf contract double [[X:%.*]], [[Y:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test___dadd_rn( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[ADD_I:%.*]] = fadd nnan ninf contract double [[X]], [[Y]] // FINITEONLY-NEXT: ret double [[ADD_I]] // -// APPROX-LABEL: @test___dadd_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]] +// APPROX-LABEL: define dso_local noundef double @test___dadd_rn( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[ADD_I:%.*]] = fadd contract double [[X]], [[Y]] // APPROX-NEXT: ret double [[ADD_I]] // -// NCRDIV-LABEL: @test___dadd_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]] +// NCRDIV-LABEL: define dso_local noundef double @test___dadd_rn( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[ADD_I:%.*]] = fadd contract double [[X]], [[Y]] // NCRDIV-NEXT: ret double [[ADD_I]] // -// AMDGCNSPIRV-LABEL: @test___dadd_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___dadd_rn( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = fadd contract double [[X]], [[Y]] // AMDGCNSPIRV-NEXT: ret double [[ADD_I]] // extern "C" __device__ double test___dadd_rn(double x, double y) { return __dadd_rn(x, y); } -// DEFAULT-LABEL: @test___ddiv_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]] +// DEFAULT-LABEL: define dso_local noundef double @test___ddiv_rn( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X]], [[Y]] // DEFAULT-NEXT: ret double [[DIV_I]] // -// FINITEONLY-LABEL: @test___ddiv_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract double [[X:%.*]], [[Y:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test___ddiv_rn( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract double [[X]], [[Y]] // FINITEONLY-NEXT: ret double [[DIV_I]] // -// APPROX-LABEL: @test___ddiv_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]] +// APPROX-LABEL: define dso_local noundef double @test___ddiv_rn( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X]], [[Y]] // APPROX-NEXT: ret double [[DIV_I]] // -// NCRDIV-LABEL: @test___ddiv_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]] +// NCRDIV-LABEL: define dso_local noundef double @test___ddiv_rn( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X]], [[Y]] // NCRDIV-NEXT: ret double [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test___ddiv_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___ddiv_rn( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X]], [[Y]] // AMDGCNSPIRV-NEXT: ret double [[DIV_I]] // extern "C" __device__ double test___ddiv_rn(double x, double y) { return __ddiv_rn(x, y); } -// DEFAULT-LABEL: @test___dmul_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]] +// DEFAULT-LABEL: define dso_local noundef double @test___dmul_rn( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract double [[X]], [[Y]] // DEFAULT-NEXT: ret double [[MUL_I]] // -// FINITEONLY-LABEL: @test___dmul_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract double [[X:%.*]], [[Y:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test___dmul_rn( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract double [[X]], [[Y]] // FINITEONLY-NEXT: ret double [[MUL_I]] // -// APPROX-LABEL: @test___dmul_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]] +// APPROX-LABEL: define dso_local noundef double @test___dmul_rn( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[MUL_I:%.*]] = fmul contract double [[X]], [[Y]] // APPROX-NEXT: ret double [[MUL_I]] // -// NCRDIV-LABEL: @test___dmul_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]] +// NCRDIV-LABEL: define dso_local noundef double @test___dmul_rn( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract double [[X]], [[Y]] // NCRDIV-NEXT: ret double [[MUL_I]] // -// AMDGCNSPIRV-LABEL: @test___dmul_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___dmul_rn( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[X]], [[Y]] // AMDGCNSPIRV-NEXT: ret double [[MUL_I]] // extern "C" __device__ double test___dmul_rn(double x, double y) { return __dmul_rn(x, y); } -// DEFAULT-LABEL: @test___drcp_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]] +// DEFAULT-LABEL: define dso_local noundef double @test___drcp_rn( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X]] // DEFAULT-NEXT: ret double [[DIV_I]] // -// FINITEONLY-LABEL: @test___drcp_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[X:%.*]] +// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @test___drcp_rn( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[X]] // FINITEONLY-NEXT: ret double [[DIV_I]] // -// APPROX-LABEL: @test___drcp_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]] +// APPROX-LABEL: define dso_local noundef double @test___drcp_rn( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X]] // APPROX-NEXT: ret double [[DIV_I]] // -// NCRDIV-LABEL: @test___drcp_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]] +// NCRDIV-LABEL: define dso_local noundef double @test___drcp_rn( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X]] // NCRDIV-NEXT: ret double [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test___drcp_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]] +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___drcp_rn( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X]] // AMDGCNSPIRV-NEXT: ret double [[DIV_I]] // extern "C" __device__ double test___drcp_rn(double x) { return __drcp_rn(x); } -// DEFAULT-LABEL: @test___dsqrt_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test___dsqrt_rn( +// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test___dsqrt_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test___dsqrt_rn( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test___dsqrt_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test___dsqrt_rn( +// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test___dsqrt_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test___dsqrt_rn( +// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___dsqrt_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test___dsqrt_rn( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test___dsqrt_rn(double x) { return __dsqrt_rn(x); } -// DEFAULT-LABEL: @test__fma_rn( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test__fma_rn( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test__fma_rn( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]], double nofpclass(nan inf) [[Z:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test__fma_rn( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.fma.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]], double nofpclass(nan inf) [[Z]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test__fma_rn( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test__fma_rn( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test__fma_rn( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test__fma_rn( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test__fma_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test__fma_rn( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X]], double [[Y]], double [[Z]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test__fma_rn(double x, double y, double z) { return __fma_rn(x, y, z); } -// DEFAULT-LABEL: @test_float_min( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_float_min( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_float_min( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_float_min( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_float_min( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_float_min( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_float_min( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_float_min( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X]], float [[Y]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_float_min( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_float_min( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X]], float [[Y]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_float_min(float x, float y) { return min(x, y); } -// DEFAULT-LABEL: @test_float_max( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef float @test_float_max( +// DEFAULT-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]]) // DEFAULT-NEXT: ret float [[TMP0]] // -// FINITEONLY-LABEL: @test_float_max( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.maxnum.f32(float nofpclass(nan inf) [[X:%.*]], float nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float @test_float_max( +// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.maxnum.f32(float nofpclass(nan inf) [[X]], float nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret float [[TMP0]] // -// APPROX-LABEL: @test_float_max( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef float @test_float_max( +// APPROX-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]]) // APPROX-NEXT: ret float [[TMP0]] // -// NCRDIV-LABEL: @test_float_max( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef float @test_float_max( +// NCRDIV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X]], float [[Y]]) // NCRDIV-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_float_max( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef float @test_float_max( +// AMDGCNSPIRV-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X]], float [[Y]]) // AMDGCNSPIRV-NEXT: ret float [[TMP0]] // extern "C" __device__ float test_float_max(float x, float y) { return max(x, y); } -// DEFAULT-LABEL: @test_double_min( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_double_min( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_double_min( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.minnum.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_double_min( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.minnum.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_double_min( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_double_min( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_double_min( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_double_min( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X]], double [[Y]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_double_min( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_double_min( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X]], double [[Y]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_double_min(double x, double y) { return min(x, y); } -// DEFAULT-LABEL: @test_double_max( -// DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// DEFAULT-LABEL: define dso_local noundef double @test_double_max( +// DEFAULT-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]]) // DEFAULT-NEXT: ret double [[TMP0]] // -// FINITEONLY-LABEL: @test_double_max( -// FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.maxnum.f64(double nofpclass(nan inf) [[X:%.*]], double nofpclass(nan inf) [[Y:%.*]]) +// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) double @test_double_max( +// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// FINITEONLY-NEXT: [[ENTRY:.*:]] +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.maxnum.f64(double nofpclass(nan inf) [[X]], double nofpclass(nan inf) [[Y]]) // FINITEONLY-NEXT: ret double [[TMP0]] // -// APPROX-LABEL: @test_double_max( -// APPROX-NEXT: entry: -// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// APPROX-LABEL: define dso_local noundef double @test_double_max( +// APPROX-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// APPROX-NEXT: [[ENTRY:.*:]] +// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]]) // APPROX-NEXT: ret double [[TMP0]] // -// NCRDIV-LABEL: @test_double_max( -// NCRDIV-NEXT: entry: -// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// NCRDIV-LABEL: define dso_local noundef double @test_double_max( +// NCRDIV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// NCRDIV-NEXT: [[ENTRY:.*:]] +// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X]], double [[Y]]) // NCRDIV-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_double_max( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef double @test_double_max( +// AMDGCNSPIRV-SAME: double noundef [[X:%.*]], double noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X]], double [[Y]]) // AMDGCNSPIRV-NEXT: ret double [[TMP0]] // extern "C" __device__ double test_double_max(double x, double y) { return max(x, y); } -// CHECK-LABEL: @test_int_min( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[COND_I:%.*]] = tail call noundef i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +// CHECK-LABEL: define dso_local noundef i32 @test_int_min( +// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[COND_I:%.*]] = tail call noundef i32 @llvm.smin.i32(i32 [[X]], i32 [[Y]]) // CHECK-NEXT: ret i32 [[COND_I]] // -// AMDGCNSPIRV-LABEL: @test_int_min( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_int_min( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smin.i32(i32 [[X]], i32 [[Y]]) // AMDGCNSPIRV-NEXT: ret i32 [[COND_I]] // extern "C" __device__ int test_int_min(int x, int y) { return min(x, y); } -// CHECK-LABEL: @test_int_max( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[COND_I:%.*]] = tail call noundef i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +// CHECK-LABEL: define dso_local noundef i32 @test_int_max( +// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[COND_I:%.*]] = tail call noundef i32 @llvm.smax.i32(i32 [[X]], i32 [[Y]]) // CHECK-NEXT: ret i32 [[COND_I]] // -// AMDGCNSPIRV-LABEL: @test_int_max( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @test_int_max( +// AMDGCNSPIRV-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smax.i32(i32 [[X]], i32 [[Y]]) // AMDGCNSPIRV-NEXT: ret i32 [[COND_I]] // extern "C" __device__ int test_int_max(int x, int y) { return max(x, y); } +//. +// DEFAULT: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// DEFAULT: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// DEFAULT: [[META6]] = !{!"Simple C++ TBAA"} +// DEFAULT: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} +// DEFAULT: [[META8]] = !{!"llvm.loop.mustprogress"} +// DEFAULT: [[META9]] = !{!"llvm.loop.unroll.disable"} +// DEFAULT: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} +// DEFAULT: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]} +// DEFAULT: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// DEFAULT: [[META13]] = !{!"int", [[META5]], i64 0} +// DEFAULT: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]], [[META9]]} +// DEFAULT: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]} +// DEFAULT: [[FLOAT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +// DEFAULT: [[META17]] = !{!"float", [[META5]], i64 0} +// DEFAULT: [[DOUBLE_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// DEFAULT: [[META19]] = !{!"double", [[META5]], i64 0} +// DEFAULT: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]} +// DEFAULT: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]} +// DEFAULT: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]} +// DEFAULT: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]} +// DEFAULT: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]} +// DEFAULT: [[LOOP25]] = distinct !{[[LOOP25]], [[META8]], [[META9]]} +//. +// FINITEONLY: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// FINITEONLY: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// FINITEONLY: [[META6]] = !{!"Simple C++ TBAA"} +// FINITEONLY: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} +// FINITEONLY: [[META8]] = !{!"llvm.loop.mustprogress"} +// FINITEONLY: [[META9]] = !{!"llvm.loop.unroll.disable"} +// FINITEONLY: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} +// FINITEONLY: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]} +// FINITEONLY: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// FINITEONLY: [[META13]] = !{!"int", [[META5]], i64 0} +// FINITEONLY: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]], [[META9]]} +// FINITEONLY: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]} +// FINITEONLY: [[FLOAT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +// FINITEONLY: [[META17]] = !{!"float", [[META5]], i64 0} +// FINITEONLY: [[DOUBLE_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// FINITEONLY: [[META19]] = !{!"double", [[META5]], i64 0} +// FINITEONLY: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]} +// FINITEONLY: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]} +// FINITEONLY: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]} +// FINITEONLY: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]} +// FINITEONLY: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]} +// FINITEONLY: [[LOOP25]] = distinct !{[[LOOP25]], [[META8]], [[META9]]} +//. +// APPROX: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// APPROX: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// APPROX: [[META6]] = !{!"Simple C++ TBAA"} +// APPROX: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} +// APPROX: [[META8]] = !{!"llvm.loop.mustprogress"} +// APPROX: [[META9]] = !{!"llvm.loop.unroll.disable"} +// APPROX: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} +// APPROX: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]} +// APPROX: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// APPROX: [[META13]] = !{!"int", [[META5]], i64 0} +// APPROX: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]], [[META9]]} +// APPROX: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]} +// APPROX: [[FLOAT_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +// APPROX: [[META17]] = !{!"float", [[META5]], i64 0} +// APPROX: [[DOUBLE_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// APPROX: [[META19]] = !{!"double", [[META5]], i64 0} +// APPROX: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]} +// APPROX: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]} +// APPROX: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]} +// APPROX: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]} +// APPROX: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]} +// APPROX: [[LOOP25]] = distinct !{[[LOOP25]], [[META8]], [[META9]]} +//. +// NCRDIV: [[CHAR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +// NCRDIV: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// NCRDIV: [[META6]] = !{!"Simple C++ TBAA"} +// NCRDIV: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} +// NCRDIV: [[META8]] = !{!"llvm.loop.mustprogress"} +// NCRDIV: [[META9]] = !{!"llvm.loop.unroll.disable"} +// NCRDIV: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} +// NCRDIV: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]} +// NCRDIV: [[META12]] = !{float 2.500000e+00} +// NCRDIV: [[INT_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} +// NCRDIV: [[META14]] = !{!"int", [[META5]], i64 0} +// NCRDIV: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]} +// NCRDIV: [[LOOP16]] = distinct !{[[LOOP16]], [[META8]], [[META9]]} +// NCRDIV: [[FLOAT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// NCRDIV: [[META18]] = !{!"float", [[META5]], i64 0} +// NCRDIV: [[DOUBLE_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} +// NCRDIV: [[META20]] = !{!"double", [[META5]], i64 0} +// NCRDIV: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]} +// NCRDIV: [[LOOP22]] = distinct !{[[LOOP22]], [[META8]], [[META9]]} +// NCRDIV: [[LOOP23]] = distinct !{[[LOOP23]], [[META8]], [[META9]]} +// NCRDIV: [[LOOP24]] = distinct !{[[LOOP24]], [[META8]], [[META9]]} +// NCRDIV: [[META25]] = !{float 3.000000e+00} +// NCRDIV: [[LOOP26]] = distinct !{[[LOOP26]], [[META8]], [[META9]]} +// NCRDIV: [[LOOP27]] = distinct !{[[LOOP27]], [[META8]], [[META9]]} +//. +// AMDGCNSPIRV: [[CHAR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +// AMDGCNSPIRV: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +// AMDGCNSPIRV: [[META7]] = !{!"Simple C++ TBAA"} +// AMDGCNSPIRV: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]} +// AMDGCNSPIRV: [[META9]] = !{!"llvm.loop.mustprogress"} +// AMDGCNSPIRV: [[META10]] = !{!"llvm.loop.unroll.disable"} +// AMDGCNSPIRV: [[LOOP11]] = distinct !{[[LOOP11]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[LOOP12]] = distinct !{[[LOOP12]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[INT_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} +// AMDGCNSPIRV: [[META14]] = !{!"int", [[META6]], i64 0} +// AMDGCNSPIRV: [[LOOP15]] = distinct !{[[LOOP15]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[LOOP16]] = distinct !{[[LOOP16]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[FLOAT_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// AMDGCNSPIRV: [[META18]] = !{!"float", [[META6]], i64 0} +// AMDGCNSPIRV: [[DOUBLE_TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} +// AMDGCNSPIRV: [[META20]] = !{!"double", [[META6]], i64 0} +// AMDGCNSPIRV: [[LOOP21]] = distinct !{[[LOOP21]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[LOOP22]] = distinct !{[[LOOP22]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[LOOP23]] = distinct !{[[LOOP23]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[LOOP24]] = distinct !{[[LOOP24]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[LOOP25]] = distinct !{[[LOOP25]], [[META9]], [[META10]]} +// AMDGCNSPIRV: [[LOOP26]] = distinct !{[[LOOP26]], [[META9]], [[META10]]} +//. diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c index d27756259fa2f..7f427ca313ddc 100644 --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: webassembly-registered-target, asserts // FIXME: This should not be using -O2 and implicitly testing the entire IR opt pipeline. @@ -7,18 +7,20 @@ #include -// CHECK-LABEL: @test_v128_load( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2:![0-9]+]] // CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_v128_load(const void *mem) { return wasm_v128_load(mem); } -// CHECK-LABEL: @test_v128_load8_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load8_splat( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VECINIT16_I]] to <4 x i32> @@ -28,9 +30,10 @@ v128_t test_v128_load8_splat(const void *mem) { return wasm_v128_load8_splat(mem); } -// CHECK-LABEL: @test_v128_load16_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load16_splat( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 // CHECK-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT8_I]] to <4 x i32> @@ -40,9 +43,10 @@ v128_t test_v128_load16_splat(const void *mem) { return wasm_v128_load16_splat(mem); } -// CHECK-LABEL: @test_v128_load32_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load32_splat( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 // CHECK-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: ret <4 x i32> [[VECINIT4_I]] @@ -51,9 +55,10 @@ v128_t test_v128_load32_splat(const void *mem) { return wasm_v128_load32_splat(mem); } -// CHECK-LABEL: @test_v128_load64_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load64_splat( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32> @@ -63,9 +68,10 @@ v128_t test_v128_load64_splat(const void *mem) { return wasm_v128_load64_splat(mem); } -// CHECK-LABEL: @test_i16x8_load8x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_load8x8( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -74,9 +80,10 @@ v128_t test_i16x8_load8x8(const void *mem) { return wasm_i16x8_load8x8(mem); } -// CHECK-LABEL: @test_u16x8_load8x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_load8x8( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -85,9 +92,10 @@ v128_t test_u16x8_load8x8(const void *mem) { return wasm_u16x8_load8x8(mem); } -// CHECK-LABEL: @test_i32x4_load16x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden range(i32 -32768, 32768) <4 x i32> @test_i32x4_load16x4( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[CONV_I]] // @@ -95,9 +103,10 @@ v128_t test_i32x4_load16x4(const void *mem) { return wasm_i32x4_load16x4(mem); } -// CHECK-LABEL: @test_u32x4_load16x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden range(i32 0, 65536) <4 x i32> @test_u32x4_load16x4( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[CONV_I]] // @@ -105,9 +114,10 @@ v128_t test_u32x4_load16x4(const void *mem) { return wasm_u32x4_load16x4(mem); } -// CHECK-LABEL: @test_i64x2_load32x2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_load32x2( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -116,9 +126,10 @@ v128_t test_i64x2_load32x2(const void *mem) { return wasm_i64x2_load32x2(mem); } -// CHECK-LABEL: @test_u64x2_load32x2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_load32x2( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[TMP0]] to <2 x i64> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -127,9 +138,10 @@ v128_t test_u64x2_load32x2(const void *mem) { return wasm_u64x2_load32x2(mem); } -// CHECK-LABEL: @test_v128_load32_zero( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load32_zero( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i64 0 // CHECK-NEXT: ret <4 x i32> [[VECINIT4_I]] // @@ -137,9 +149,10 @@ v128_t test_v128_load32_zero(const void *mem) { return wasm_v128_load32_zero(mem); } -// CHECK-LABEL: @test_v128_load64_zero( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load64_zero( +// CHECK-SAME: ptr noundef readonly captures(none) [[MEM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -148,10 +161,11 @@ v128_t test_v128_load64_zero(const void *mem) { return wasm_v128_load64_zero(mem); } -// CHECK-LABEL: @test_v128_load8_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load8_lane( +// CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC]] to <16 x i8> // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP0]], i64 15 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -160,10 +174,11 @@ v128_t test_v128_load8_lane(const uint8_t *ptr, v128_t vec) { return wasm_v128_load8_lane(ptr, vec, 15); } -// CHECK-LABEL: @test_v128_load16_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load16_lane( +// CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC]] to <8 x i16> // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP0]], i64 7 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -172,20 +187,22 @@ v128_t test_v128_load16_lane(const uint16_t *ptr, v128_t vec) { return wasm_v128_load16_lane(ptr, vec, 7); } -// CHECK-LABEL: @test_v128_load32_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC:%.*]], i32 [[TMP0]], i64 3 +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load32_lane( +// CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC]], i32 [[TMP0]], i64 3 // CHECK-NEXT: ret <4 x i32> [[VECINS_I]] // v128_t test_v128_load32_lane(const uint32_t *ptr, v128_t vec) { return wasm_v128_load32_lane(ptr, vec, 3); } -// CHECK-LABEL: @test_v128_load64_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden <4 x i32> @test_v128_load64_lane( +// CHECK-SAME: ptr noundef readonly captures(none) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC]] to <2 x i64> // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP0]], i64 1 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -194,76 +211,82 @@ v128_t test_v128_load64_lane(const uint64_t *ptr, v128_t vec) { return wasm_v128_load64_lane(ptr, vec, 1); } -// CHECK-LABEL: @test_v128_store( -// CHECK-NEXT: entry: -// CHECK-NEXT: store <4 x i32> [[A:%.*]], ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden void @test_v128_store( +// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 16)) [[MEM:%.*]], <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: store <4 x i32> [[A]], ptr [[MEM]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // void test_v128_store(void *mem, v128_t a) { wasm_v128_store(mem, a); } -// CHECK-LABEL: @test_v128_store8_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden void @test_v128_store8_lane( +// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 1)) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC]] to <16 x i8> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15 -// CHECK-NEXT: store i8 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: store i8 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // void test_v128_store8_lane(uint8_t *ptr, v128_t vec) { wasm_v128_store8_lane(ptr, vec, 15); } -// CHECK-LABEL: @test_v128_store16_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden void @test_v128_store16_lane( +// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 2)) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC]] to <8 x i16> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7 -// CHECK-NEXT: store i16 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: store i16 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // void test_v128_store16_lane(uint16_t *ptr, v128_t vec) { wasm_v128_store16_lane(ptr, vec, 7); } -// CHECK-LABEL: @test_v128_store32_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[VEC:%.*]], i64 3 -// CHECK-NEXT: store i32 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-LABEL: define hidden void @test_v128_store32_lane( +// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 4)) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[VEC]], i64 3 +// CHECK-NEXT: store i32 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // void test_v128_store32_lane(uint32_t *ptr, v128_t vec) { wasm_v128_store32_lane(ptr, vec, 3); } -// CHECK-LABEL: @test_v128_store64_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden void @test_v128_store64_lane( +// CHECK-SAME: ptr noundef writeonly captures(none) initializes((0, 8)) [[PTR:%.*]], <4 x i32> noundef [[VEC:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC]] to <2 x i64> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 -// CHECK-NEXT: store i64 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: store i64 [[VECEXT_I]], ptr [[PTR]], align 1, !tbaa [[CHAR_TBAA2]] // CHECK-NEXT: ret void // void test_v128_store64_lane(uint64_t *ptr, v128_t vec) { wasm_v128_store64_lane(ptr, vec, 1); } -// CHECK-LABEL: @test_i8x16_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i64 1 -// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i64 2 -// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i64 3 -// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i64 4 -// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i64 5 -// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i64 6 -// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i64 7 -// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i64 8 -// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i64 9 -// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i64 10 -// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i64 11 -// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i64 12 -// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i64 13 -// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i64 14 -// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i64 15 +// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_make( +// CHECK-SAME: i8 noundef signext [[C0:%.*]], i8 noundef signext [[C1:%.*]], i8 noundef signext [[C2:%.*]], i8 noundef signext [[C3:%.*]], i8 noundef signext [[C4:%.*]], i8 noundef signext [[C5:%.*]], i8 noundef signext [[C6:%.*]], i8 noundef signext [[C7:%.*]], i8 noundef signext [[C8:%.*]], i8 noundef signext [[C9:%.*]], i8 noundef signext [[C10:%.*]], i8 noundef signext [[C11:%.*]], i8 noundef signext [[C12:%.*]], i8 noundef signext [[C13:%.*]], i8 noundef signext [[C14:%.*]], i8 noundef signext [[C15:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1]], i64 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2]], i64 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3]], i64 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4]], i64 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5]], i64 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6]], i64 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7]], i64 7 +// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8]], i64 8 +// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9]], i64 9 +// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10]], i64 10 +// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11]], i64 11 +// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12]], i64 12 +// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13]], i64 13 +// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14]], i64 14 +// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15]], i64 15 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -271,24 +294,25 @@ v128_t test_i8x16_make(int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, in return wasm_i8x16_make(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15); } -// CHECK-LABEL: @test_u8x16_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i64 1 -// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i64 2 -// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i64 3 -// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i64 4 -// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i64 5 -// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i64 6 -// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i64 7 -// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i64 8 -// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i64 9 -// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i64 10 -// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i64 11 -// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i64 12 -// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i64 13 -// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i64 14 -// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i64 15 +// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_make( +// CHECK-SAME: i8 noundef zeroext [[C0:%.*]], i8 noundef zeroext [[C1:%.*]], i8 noundef zeroext [[C2:%.*]], i8 noundef zeroext [[C3:%.*]], i8 noundef zeroext [[C4:%.*]], i8 noundef zeroext [[C5:%.*]], i8 noundef zeroext [[C6:%.*]], i8 noundef zeroext [[C7:%.*]], i8 noundef zeroext [[C8:%.*]], i8 noundef zeroext [[C9:%.*]], i8 noundef zeroext [[C10:%.*]], i8 noundef zeroext [[C11:%.*]], i8 noundef zeroext [[C12:%.*]], i8 noundef zeroext [[C13:%.*]], i8 noundef zeroext [[C14:%.*]], i8 noundef zeroext [[C15:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1]], i64 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2]], i64 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3]], i64 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4]], i64 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5]], i64 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6]], i64 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7]], i64 7 +// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8]], i64 8 +// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9]], i64 9 +// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10]], i64 10 +// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11]], i64 11 +// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12]], i64 12 +// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13]], i64 13 +// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14]], i64 14 +// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15]], i64 15 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -296,16 +320,17 @@ v128_t test_u8x16_make(uint8_t c0, uint8_t c1, uint8_t c2, uint8_t c3, uint8_t c return wasm_u8x16_make(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15); } -// CHECK-LABEL: @test_i16x8_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i64 1 -// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i64 2 -// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i64 3 -// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i64 4 -// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i64 5 -// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i64 6 -// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i64 7 +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_make( +// CHECK-SAME: i16 noundef signext [[C0:%.*]], i16 noundef signext [[C1:%.*]], i16 noundef signext [[C2:%.*]], i16 noundef signext [[C3:%.*]], i16 noundef signext [[C4:%.*]], i16 noundef signext [[C5:%.*]], i16 noundef signext [[C6:%.*]], i16 noundef signext [[C7:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1]], i64 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2]], i64 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3]], i64 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4]], i64 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5]], i64 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6]], i64 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7]], i64 7 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -313,16 +338,17 @@ v128_t test_i16x8_make(int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c return wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7); } -// CHECK-LABEL: @test_u16x8_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i64 1 -// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i64 2 -// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i64 3 -// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i64 4 -// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i64 5 -// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i64 6 -// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i64 7 +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_make( +// CHECK-SAME: i16 noundef zeroext [[C0:%.*]], i16 noundef zeroext [[C1:%.*]], i16 noundef zeroext [[C2:%.*]], i16 noundef zeroext [[C3:%.*]], i16 noundef zeroext [[C4:%.*]], i16 noundef zeroext [[C5:%.*]], i16 noundef zeroext [[C6:%.*]], i16 noundef zeroext [[C7:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1]], i64 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2]], i64 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3]], i64 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4]], i64 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5]], i64 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6]], i64 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7]], i64 7 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -330,34 +356,37 @@ v128_t test_u16x8_make(uint16_t c0, uint16_t c1, uint16_t c2, uint16_t c3, uint1 return wasm_u16x8_make(c0, c1, c2, c3, c4, c5, c6, c7); } -// CHECK-LABEL: @test_i32x4_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i64 1 -// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i64 2 -// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i64 3 +// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_make( +// CHECK-SAME: i32 noundef [[C0:%.*]], i32 noundef [[C1:%.*]], i32 noundef [[C2:%.*]], i32 noundef [[C3:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1]], i64 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2]], i64 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3]], i64 3 // CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]] // v128_t test_i32x4_make(int32_t c0, int32_t c1, int32_t c2, int32_t c3) { return wasm_i32x4_make(c0, c1, c2, c3); } -// CHECK-LABEL: @test_u32x4_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i64 1 -// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i64 2 -// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i64 3 +// CHECK-LABEL: define hidden <4 x i32> @test_u32x4_make( +// CHECK-SAME: i32 noundef [[C0:%.*]], i32 noundef [[C1:%.*]], i32 noundef [[C2:%.*]], i32 noundef [[C3:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1]], i64 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2]], i64 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3]], i64 3 // CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]] // v128_t test_u32x4_make(uint32_t c0, uint32_t c1, uint32_t c2, uint32_t c3) { return wasm_u32x4_make(c0, c1, c2, c3); } -// CHECK-LABEL: @test_i64x2_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i64 1 +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_make( +// CHECK-SAME: i64 noundef [[C0:%.*]], i64 noundef [[C1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1]], i64 1 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -365,10 +394,11 @@ v128_t test_i64x2_make(int64_t c0, int64_t c1) { return wasm_i64x2_make(c0, c1); } -// CHECK-LABEL: @test_u64x2_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i64 1 +// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_make( +// CHECK-SAME: i64 noundef [[C0:%.*]], i64 noundef [[C1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1]], i64 1 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -376,12 +406,13 @@ v128_t test_u64x2_make(uint64_t c0, uint64_t c1) { return wasm_u64x2_make(c0, c1); } -// CHECK-LABEL: @test_f32x4_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C1:%.*]], i64 1 -// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C2:%.*]], i64 2 -// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C3:%.*]], i64 3 +// CHECK-LABEL: define hidden <4 x i32> @test_f32x4_make( +// CHECK-SAME: float noundef [[C0:%.*]], float noundef [[C1:%.*]], float noundef [[C2:%.*]], float noundef [[C3:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C1]], i64 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C2]], i64 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C3]], i64 3 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -389,10 +420,11 @@ v128_t test_f32x4_make(float c0, float c1, float c2, float c3) { return wasm_f32x4_make(c0, c1, c2, c3); } -// CHECK-LABEL: @test_f64x2_make( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[C0:%.*]], i64 0 -// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C1:%.*]], i64 1 +// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_make( +// CHECK-SAME: double noundef [[C0:%.*]], double noundef [[C1:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[C0]], i64 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C1]], i64 1 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -400,169 +432,190 @@ v128_t test_f64x2_make(double c0, double c1) { return wasm_f64x2_make(c0, c1); } -// CHECK-LABEL: @test_i8x16_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 50462976, 252579085) <4 x i32> @test_i8x16_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_i8x16_const(void) { return wasm_i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -// CHECK-LABEL: @test_u8x16_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 50462976, 252579085) <4 x i32> @test_u8x16_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_u8x16_const(void) { return wasm_u8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -// CHECK-LABEL: @test_i16x8_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 65536, 458759) <4 x i32> @test_i16x8_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_i16x8_const(void) { return wasm_i16x8_const(0, 1, 2, 3, 4, 5, 6, 7); } -// CHECK-LABEL: @test_u16x8_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 65536, 458759) <4 x i32> @test_u16x8_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_u16x8_const(void) { return wasm_u16x8_const(0, 1, 2, 3, 4, 5, 6, 7); } -// CHECK-LABEL: @test_i32x4_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 0, 4) <4 x i32> @test_i32x4_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_i32x4_const(void) { return wasm_i32x4_const(0, 1, 2, 3); } -// CHECK-LABEL: @test_u32x4_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 0, 4) <4 x i32> @test_u32x4_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_u32x4_const(void) { return wasm_u32x4_const(0, 1, 2, 3); } -// CHECK-LABEL: @test_i64x2_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 0, 2) <4 x i32> @test_i64x2_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_i64x2_const(void) { return wasm_i64x2_const(0, 1); } -// CHECK-LABEL: @test_u64x2_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 0, 2) <4 x i32> @test_u64x2_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_u64x2_const(void) { return wasm_u64x2_const(0, 1); } -// CHECK-LABEL: @test_f32x4_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 0, 1077936129) <4 x i32> @test_f32x4_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_f32x4_const(void) { return wasm_f32x4_const(0, 1, 2, 3); } -// CHECK-LABEL: @test_f64x2_const( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 0, 1072693249) <4 x i32> @test_f64x2_const( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_f64x2_const(void) { return wasm_f64x2_const(0, 1); } -// CHECK-LABEL: @test_i8x16_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> splat (i32 707406378) // v128_t test_i8x16_const_splat(void) { return wasm_i8x16_const_splat(42); } -// CHECK-LABEL: @test_u8x16_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> splat (i32 707406378) // v128_t test_u8x16_const_splat(void) { return wasm_u8x16_const_splat(42); } -// CHECK-LABEL: @test_i16x8_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> splat (i32 2752554) // v128_t test_i16x8_const_splat(void) { return wasm_i16x8_const_splat(42); } -// CHECK-LABEL: @test_u16x8_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> splat (i32 2752554) // v128_t test_u16x8_const_splat(void) { return wasm_u16x8_const_splat(42); } -// CHECK-LABEL: @test_i32x4_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> splat (i32 42) // v128_t test_i32x4_const_splat(void) { return wasm_i32x4_const_splat(42); } -// CHECK-LABEL: @test_u32x4_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> splat (i32 42) // v128_t test_u32x4_const_splat(void) { return wasm_u32x4_const_splat(42); } -// CHECK-LABEL: @test_i64x2_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 0, 43) <4 x i32> @test_i64x2_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_i64x2_const_splat(void) { return wasm_i64x2_const_splat(42); } -// CHECK-LABEL: @test_u64x2_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 0, 43) <4 x i32> @test_u64x2_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_u64x2_const_splat(void) { return wasm_u64x2_const_splat(42); } -// CHECK-LABEL: @test_f32x4_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> splat (i32 1109917696) // v128_t test_f32x4_const_splat(void) { return wasm_f32x4_const_splat(42); } -// CHECK-LABEL: @test_f64x2_const_splat( -// CHECK-NEXT: entry: +// CHECK-LABEL: define hidden noundef range(i32 0, 1078263809) <4 x i32> @test_f64x2_const_splat( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret <4 x i32> // v128_t test_f64x2_const_splat(void) { return wasm_f64x2_const_splat(42); } -// CHECK-LABEL: @test_i8x16_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_splat( +// CHECK-SAME: i8 noundef signext [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0 // CHECK-NEXT: [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -571,9 +624,10 @@ v128_t test_i8x16_splat(int8_t a) { return wasm_i8x16_splat(a); } -// CHECK-LABEL: @test_u8x16_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_splat( +// CHECK-SAME: i8 noundef zeroext [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0 // CHECK-NEXT: [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -582,9 +636,10 @@ v128_t test_u8x16_splat(uint8_t a) { return wasm_u8x16_splat(a); } -// CHECK-LABEL: @test_i8x16_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef signext i8 @test_i8x16_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15 // CHECK-NEXT: ret i8 [[VECEXT_I]] // @@ -592,9 +647,10 @@ int8_t test_i8x16_extract_lane(v128_t a) { return wasm_i8x16_extract_lane(a, 15); } -// CHECK-LABEL: @test_u8x16_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef zeroext i8 @test_u8x16_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15 // CHECK-NEXT: ret i8 [[VECEXT_I]] // @@ -602,10 +658,11 @@ uint8_t test_u8x16_extract_lane(v128_t a) { return wasm_u8x16_extract_lane(a, 15); } -// CHECK-LABEL: @test_i8x16_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i64 15 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B]], i64 15 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -613,10 +670,11 @@ v128_t test_i8x16_replace_lane(v128_t a, int8_t b) { return wasm_i8x16_replace_lane(a, 15, b); } -// CHECK-LABEL: @test_u8x16_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i64 15 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B]], i64 15 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -624,9 +682,10 @@ v128_t test_u8x16_replace_lane(v128_t a, uint8_t b) { return wasm_u8x16_replace_lane(a, 15, b); } -// CHECK-LABEL: @test_i16x8_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_splat( +// CHECK-SAME: i16 noundef signext [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0 // CHECK-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -635,9 +694,10 @@ v128_t test_i16x8_splat(int16_t a) { return wasm_i16x8_splat(a); } -// CHECK-LABEL: @test_u16x8_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_splat( +// CHECK-SAME: i16 noundef zeroext [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0 // CHECK-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -646,9 +706,10 @@ v128_t test_u16x8_splat(uint16_t a) { return wasm_u16x8_splat(a); } -// CHECK-LABEL: @test_i16x8_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef signext i16 @test_i16x8_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7 // CHECK-NEXT: ret i16 [[VECEXT_I]] // @@ -656,9 +717,10 @@ int16_t test_i16x8_extract_lane(v128_t a) { return wasm_i16x8_extract_lane(a, 7); } -// CHECK-LABEL: @test_u16x8_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef zeroext i16 @test_u16x8_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7 // CHECK-NEXT: ret i16 [[VECEXT_I]] // @@ -666,10 +728,11 @@ uint16_t test_u16x8_extract_lane(v128_t a) { return wasm_u16x8_extract_lane(a, 7); } -// CHECK-LABEL: @test_i16x8_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i64 7 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B]], i64 7 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -677,10 +740,11 @@ v128_t test_i16x8_replace_lane(v128_t a, int16_t b) { return wasm_i16x8_replace_lane(a, 7, b); } -// CHECK-LABEL: @test_u16x8_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i64 7 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B]], i64 7 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -688,9 +752,10 @@ v128_t test_u16x8_replace_lane(v128_t a, uint16_t b) { return wasm_u16x8_replace_lane(a, 7, b); } -// CHECK-LABEL: @test_i32x4_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_splat( +// CHECK-SAME: i32 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 // CHECK-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]] // @@ -698,9 +763,10 @@ v128_t test_i32x4_splat(int32_t a) { return wasm_i32x4_splat(a); } -// CHECK-LABEL: @test_u32x4_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_u32x4_splat( +// CHECK-SAME: i32 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 // CHECK-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]] // @@ -708,45 +774,50 @@ v128_t test_u32x4_splat(uint32_t a) { return wasm_u32x4_splat(a); } -// CHECK-LABEL: @test_i32x4_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i64 3 +// CHECK-LABEL: define hidden noundef i32 @test_i32x4_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A]], i64 3 // CHECK-NEXT: ret i32 [[VECEXT_I]] // int32_t test_i32x4_extract_lane(v128_t a) { return wasm_i32x4_extract_lane(a, 3); } -// CHECK-LABEL: @test_u32x4_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i64 3 +// CHECK-LABEL: define hidden noundef i32 @test_u32x4_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A]], i64 3 // CHECK-NEXT: ret i32 [[VECEXT_I]] // uint32_t test_u32x4_extract_lane(v128_t a) { return wasm_u32x4_extract_lane(a, 3); } -// CHECK-LABEL: @test_i32x4_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i64 3 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[A]], i32 [[B]], i64 3 // CHECK-NEXT: ret <4 x i32> [[VECINS_I]] // v128_t test_i32x4_replace_lane(v128_t a, int32_t b) { return wasm_i32x4_replace_lane(a, 3, b); } -// CHECK-LABEL: @test_u32x4_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i64 3 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[A]], i32 [[B]], i64 3 // CHECK-NEXT: ret <4 x i32> [[VECINS_I]] // v128_t test_u32x4_replace_lane(v128_t a, uint32_t b) { return wasm_u32x4_replace_lane(a, 3, b); } -// CHECK-LABEL: @test_i64x2_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_splat( +// CHECK-SAME: i64 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 // CHECK-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -755,9 +826,10 @@ v128_t test_i64x2_splat(int64_t a) { return wasm_i64x2_splat(a); } -// CHECK-LABEL: @test_u64x2_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_splat( +// CHECK-SAME: i64 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 // CHECK-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -766,9 +838,10 @@ v128_t test_u64x2_splat(uint64_t a) { return wasm_u64x2_splat(a); } -// CHECK-LABEL: @test_i64x2_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef i64 @test_i64x2_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 // CHECK-NEXT: ret i64 [[VECEXT_I]] // @@ -776,9 +849,10 @@ int64_t test_i64x2_extract_lane(v128_t a) { return wasm_i64x2_extract_lane(a, 1); } -// CHECK-LABEL: @test_u64x2_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef i64 @test_u64x2_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 // CHECK-NEXT: ret i64 [[VECEXT_I]] // @@ -786,10 +860,11 @@ uint64_t test_u64x2_extract_lane(v128_t a) { return wasm_u64x2_extract_lane(a, 1); } -// CHECK-LABEL: @test_i64x2_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i64 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B]], i64 1 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -797,10 +872,11 @@ v128_t test_i64x2_replace_lane(v128_t a, int64_t b) { return wasm_i64x2_replace_lane(a, 1, b); } -// CHECK-LABEL: @test_u64x2_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u64x2_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i64 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B]], i64 1 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -808,9 +884,10 @@ v128_t test_u64x2_replace_lane(v128_t a, uint64_t b) { return wasm_u64x2_replace_lane(a, 1, b); } -// CHECK-LABEL: @test_f32x4_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_f32x4_splat( +// CHECK-SAME: float noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT_I]] to <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -819,9 +896,10 @@ v128_t test_f32x4_splat(float a) { return wasm_f32x4_splat(a); } -// CHECK-LABEL: @test_f32x4_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef float @test_f32x4_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 // CHECK-NEXT: ret float [[VECEXT_I]] // @@ -829,10 +907,11 @@ float test_f32x4_extract_lane(v128_t a) { return wasm_f32x4_extract_lane(a, 3); } -// CHECK-LABEL: @test_f32x4_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i64 3 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP0]], float [[B]], i64 3 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -840,9 +919,10 @@ v128_t test_f32x4_replace_lane(v128_t a, float b) { return wasm_f32x4_replace_lane(a, 3, b); } -// CHECK-LABEL: @test_f64x2_splat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_splat( +// CHECK-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[A]], i64 0 // CHECK-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x double> [[VECINIT_I]], <2 x double> poison, <2 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -851,9 +931,10 @@ v128_t test_f64x2_splat(double a) { return wasm_f64x2_splat(a); } -// CHECK-LABEL: @test_f64x2_extract_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef double @test_f64x2_extract_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i64 1 // CHECK-NEXT: ret double [[VECEXT_I]] // @@ -861,10 +942,11 @@ double test_f64x2_extract_lane(v128_t a) { return wasm_f64x2_extract_lane(a, 1); } -// CHECK-LABEL: @test_f64x2_replace_lane( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x double> [[TMP0]], double [[B:%.*]], i64 1 +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_replace_lane( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x double> [[TMP0]], double [[B]], i64 1 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[VECINS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -872,10 +954,11 @@ v128_t test_f64x2_replace_lane(v128_t a, double b) { return wasm_f64x2_replace_lane(a, 1, b); } -// CHECK-LABEL: @test_i8x16_eq( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_eq( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -885,10 +968,11 @@ v128_t test_i8x16_eq(v128_t a, v128_t b) { return wasm_i8x16_eq(a, b); } -// CHECK-LABEL: @test_i8x16_ne( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_ne( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -898,10 +982,11 @@ v128_t test_i8x16_ne(v128_t a, v128_t b) { return wasm_i8x16_ne(a, b); } -// CHECK-LABEL: @test_i8x16_lt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_lt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -911,10 +996,11 @@ v128_t test_i8x16_lt(v128_t a, v128_t b) { return wasm_i8x16_lt(a, b); } -// CHECK-LABEL: @test_u8x16_lt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_lt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -924,10 +1010,11 @@ v128_t test_u8x16_lt(v128_t a, v128_t b) { return wasm_u8x16_lt(a, b); } -// CHECK-LABEL: @test_i8x16_gt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_gt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -937,10 +1024,11 @@ v128_t test_i8x16_gt(v128_t a, v128_t b) { return wasm_i8x16_gt(a, b); } -// CHECK-LABEL: @test_u8x16_gt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_gt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -950,10 +1038,11 @@ v128_t test_u8x16_gt(v128_t a, v128_t b) { return wasm_u8x16_gt(a, b); } -// CHECK-LABEL: @test_i8x16_le( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_le( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -963,10 +1052,11 @@ v128_t test_i8x16_le(v128_t a, v128_t b) { return wasm_i8x16_le(a, b); } -// CHECK-LABEL: @test_u8x16_le( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_le( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -976,10 +1066,11 @@ v128_t test_u8x16_le(v128_t a, v128_t b) { return wasm_u8x16_le(a, b); } -// CHECK-LABEL: @test_i8x16_ge( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_ge( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -989,10 +1080,11 @@ v128_t test_i8x16_ge(v128_t a, v128_t b) { return wasm_i8x16_ge(a, b); } -// CHECK-LABEL: @test_u8x16_ge( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_ge( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32> @@ -1002,10 +1094,11 @@ v128_t test_u8x16_ge(v128_t a, v128_t b) { return wasm_u8x16_ge(a, b); } -// CHECK-LABEL: @test_i16x8_eq( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_eq( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1015,10 +1108,11 @@ v128_t test_i16x8_eq(v128_t a, v128_t b) { return wasm_i16x8_eq(a, b); } -// CHECK-LABEL: @test_i16x8_ne( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_ne( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1028,10 +1122,11 @@ v128_t test_i16x8_ne(v128_t a, v128_t b) { return wasm_i16x8_ne(a, b); } -// CHECK-LABEL: @test_i16x8_lt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_lt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1041,10 +1136,11 @@ v128_t test_i16x8_lt(v128_t a, v128_t b) { return wasm_i16x8_lt(a, b); } -// CHECK-LABEL: @test_u16x8_lt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_lt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1054,10 +1150,11 @@ v128_t test_u16x8_lt(v128_t a, v128_t b) { return wasm_u16x8_lt(a, b); } -// CHECK-LABEL: @test_i16x8_gt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_gt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1067,10 +1164,11 @@ v128_t test_i16x8_gt(v128_t a, v128_t b) { return wasm_i16x8_gt(a, b); } -// CHECK-LABEL: @test_u16x8_gt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_gt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1080,10 +1178,11 @@ v128_t test_u16x8_gt(v128_t a, v128_t b) { return wasm_u16x8_gt(a, b); } -// CHECK-LABEL: @test_i16x8_le( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_le( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1093,10 +1192,11 @@ v128_t test_i16x8_le(v128_t a, v128_t b) { return wasm_i16x8_le(a, b); } -// CHECK-LABEL: @test_u16x8_le( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_le( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1106,10 +1206,11 @@ v128_t test_u16x8_le(v128_t a, v128_t b) { return wasm_u16x8_le(a, b); } -// CHECK-LABEL: @test_i16x8_ge( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_ge( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1119,10 +1220,11 @@ v128_t test_i16x8_ge(v128_t a, v128_t b) { return wasm_i16x8_ge(a, b); } -// CHECK-LABEL: @test_u16x8_ge( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_ge( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32> @@ -1132,9 +1234,10 @@ v128_t test_u16x8_ge(v128_t a, v128_t b) { return wasm_u16x8_ge(a, b); } -// CHECK-LABEL: @test_i32x4_eq( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_eq( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1142,9 +1245,10 @@ v128_t test_i32x4_eq(v128_t a, v128_t b) { return wasm_i32x4_eq(a, b); } -// CHECK-LABEL: @test_i32x4_ne( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ne( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1152,9 +1256,10 @@ v128_t test_i32x4_ne(v128_t a, v128_t b) { return wasm_i32x4_ne(a, b); } -// CHECK-LABEL: @test_i32x4_lt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_lt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1162,9 +1267,10 @@ v128_t test_i32x4_lt(v128_t a, v128_t b) { return wasm_i32x4_lt(a, b); } -// CHECK-LABEL: @test_u32x4_lt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_lt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1172,9 +1278,10 @@ v128_t test_u32x4_lt(v128_t a, v128_t b) { return wasm_u32x4_lt(a, b); } -// CHECK-LABEL: @test_i32x4_gt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_gt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1182,9 +1289,10 @@ v128_t test_i32x4_gt(v128_t a, v128_t b) { return wasm_i32x4_gt(a, b); } -// CHECK-LABEL: @test_u32x4_gt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_gt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1192,9 +1300,10 @@ v128_t test_u32x4_gt(v128_t a, v128_t b) { return wasm_u32x4_gt(a, b); } -// CHECK-LABEL: @test_i32x4_le( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_le( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1202,9 +1311,10 @@ v128_t test_i32x4_le(v128_t a, v128_t b) { return wasm_i32x4_le(a, b); } -// CHECK-LABEL: @test_u32x4_le( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_le( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1212,9 +1322,10 @@ v128_t test_u32x4_le(v128_t a, v128_t b) { return wasm_u32x4_le(a, b); } -// CHECK-LABEL: @test_i32x4_ge( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ge( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1222,9 +1333,10 @@ v128_t test_i32x4_ge(v128_t a, v128_t b) { return wasm_i32x4_ge(a, b); } -// CHECK-LABEL: @test_u32x4_ge( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_ge( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i32> [[A]], [[B]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] // @@ -1232,10 +1344,11 @@ v128_t test_u32x4_ge(v128_t a, v128_t b) { return wasm_u32x4_ge(a, b); } -// CHECK-LABEL: @test_i64x2_eq( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_eq( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1245,10 +1358,11 @@ v128_t test_i64x2_eq(v128_t a, v128_t b) { return wasm_i64x2_eq(a, b); } -// CHECK-LABEL: @test_i64x2_ne( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_ne( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <2 x i64> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1258,10 +1372,11 @@ v128_t test_i64x2_ne(v128_t a, v128_t b) { return wasm_i64x2_ne(a, b); } -// CHECK-LABEL: @test_i64x2_lt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_lt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> // CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i64> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1271,10 +1386,11 @@ v128_t test_i64x2_lt(v128_t a, v128_t b) { return wasm_i64x2_lt(a, b); } -// CHECK-LABEL: @test_i64x2_gt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_gt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <2 x i64> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1284,10 +1400,11 @@ v128_t test_i64x2_gt(v128_t a, v128_t b) { return wasm_i64x2_gt(a, b); } -// CHECK-LABEL: @test_i64x2_le( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_le( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> // CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <2 x i64> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1297,10 +1414,11 @@ v128_t test_i64x2_le(v128_t a, v128_t b) { return wasm_i64x2_le(a, b); } -// CHECK-LABEL: @test_i64x2_ge( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_ge( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> // CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <2 x i64> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1310,10 +1428,11 @@ v128_t test_i64x2_ge(v128_t a, v128_t b) { return wasm_i64x2_ge(a, b); } -// CHECK-LABEL: @test_f32x4_eq( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_eq( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] @@ -1322,10 +1441,11 @@ v128_t test_f32x4_eq(v128_t a, v128_t b) { return wasm_f32x4_eq(a, b); } -// CHECK-LABEL: @test_f32x4_ne( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ne( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] @@ -1334,10 +1454,11 @@ v128_t test_f32x4_ne(v128_t a, v128_t b) { return wasm_f32x4_ne(a, b); } -// CHECK-LABEL: @test_f32x4_lt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_lt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] @@ -1346,10 +1467,11 @@ v128_t test_f32x4_lt(v128_t a, v128_t b) { return wasm_f32x4_lt(a, b); } -// CHECK-LABEL: @test_f32x4_gt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_gt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] @@ -1358,10 +1480,11 @@ v128_t test_f32x4_gt(v128_t a, v128_t b) { return wasm_f32x4_gt(a, b); } -// CHECK-LABEL: @test_f32x4_le( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_le( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] @@ -1370,10 +1493,11 @@ v128_t test_f32x4_le(v128_t a, v128_t b) { return wasm_f32x4_le(a, b); } -// CHECK-LABEL: @test_f32x4_ge( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ge( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[SEXT_I]] @@ -1382,10 +1506,11 @@ v128_t test_f32x4_ge(v128_t a, v128_t b) { return wasm_f32x4_ge(a, b); } -// CHECK-LABEL: @test_f64x2_eq( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_eq( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1395,10 +1520,11 @@ v128_t test_f64x2_eq(v128_t a, v128_t b) { return wasm_f64x2_eq(a, b); } -// CHECK-LABEL: @test_f64x2_ne( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_ne( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp une <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1408,10 +1534,11 @@ v128_t test_f64x2_ne(v128_t a, v128_t b) { return wasm_f64x2_ne(a, b); } -// CHECK-LABEL: @test_f64x2_lt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_lt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1421,10 +1548,11 @@ v128_t test_f64x2_lt(v128_t a, v128_t b) { return wasm_f64x2_lt(a, b); } -// CHECK-LABEL: @test_f64x2_gt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_gt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1434,10 +1562,11 @@ v128_t test_f64x2_gt(v128_t a, v128_t b) { return wasm_f64x2_gt(a, b); } -// CHECK-LABEL: @test_f64x2_le( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_le( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1447,10 +1576,11 @@ v128_t test_f64x2_le(v128_t a, v128_t b) { return wasm_f64x2_le(a, b); } -// CHECK-LABEL: @test_f64x2_ge( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_ge( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32> @@ -1460,55 +1590,61 @@ v128_t test_f64x2_ge(v128_t a, v128_t b) { return wasm_f64x2_ge(a, b); } -// CHECK-LABEL: @test_v128_not( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) +// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_not( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A]], splat (i32 -1) // CHECK-NEXT: ret <4 x i32> [[NOT_I]] // v128_t test_v128_not(v128_t a) { return wasm_v128_not(a); } -// CHECK-LABEL: @test_v128_and( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[B:%.*]], [[A:%.*]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_and( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[B]], [[A]] // CHECK-NEXT: ret <4 x i32> [[AND_I]] // v128_t test_v128_and(v128_t a, v128_t b) { return wasm_v128_and(a, b); } -// CHECK-LABEL: @test_v128_or( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i32> [[B:%.*]], [[A:%.*]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_or( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i32> [[B]], [[A]] // CHECK-NEXT: ret <4 x i32> [[OR_I]] // v128_t test_v128_or(v128_t a, v128_t b) { return wasm_v128_or(a, b); } -// CHECK-LABEL: @test_v128_xor( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i32> [[B:%.*]], [[A:%.*]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_xor( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i32> [[B]], [[A]] // CHECK-NEXT: ret <4 x i32> [[XOR_I]] // v128_t test_v128_xor(v128_t a, v128_t b) { return wasm_v128_xor(a, b); } -// CHECK-LABEL: @test_v128_andnot( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1) -// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[A:%.*]], [[NOT_I]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_v128_andnot( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B]], splat (i32 -1) +// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[A]], [[NOT_I]] // CHECK-NEXT: ret <4 x i32> [[AND_I]] // v128_t test_v128_andnot(v128_t a, v128_t b) { return wasm_v128_andnot(a, b); } -// CHECK-LABEL: @test_v128_any_true( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden zeroext i1 @test_v128_any_true( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] @@ -1517,18 +1653,20 @@ bool test_v128_any_true(v128_t a) { return wasm_v128_any_true(a); } -// CHECK-LABEL: @test_v128_bitselect( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) +// CHECK-LABEL: define hidden <4 x i32> @test_v128_bitselect( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[MASK:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> [[MASK]]) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) { return wasm_v128_bitselect(a, b, mask); } -// CHECK-LABEL: @test_i8x16_abs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_abs( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[ABS_I:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[ABS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -1537,9 +1675,10 @@ v128_t test_i8x16_abs(v128_t a) { return wasm_i8x16_abs(a); } -// CHECK-LABEL: @test_i8x16_neg( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_neg( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, [[TMP0]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -1548,9 +1687,10 @@ v128_t test_i8x16_neg(v128_t a) { return wasm_i8x16_neg(a); } -// CHECK-LABEL: @test_i8x16_all_true( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden zeroext i1 @test_i8x16_all_true( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] @@ -1559,9 +1699,10 @@ bool test_i8x16_all_true(v128_t a) { return wasm_i8x16_all_true(a); } -// CHECK-LABEL: @test_i8x16_bitmask( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden i32 @test_i8x16_bitmask( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -1569,21 +1710,23 @@ uint32_t test_i8x16_bitmask(v128_t a) { return wasm_i8x16_bitmask(a); } -// CHECK-LABEL: @test_i8x16_popcnt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = tail call range(i8 0, 9) <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_popcnt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[ELT_CTPOP_I:%.*]] = tail call range(i8 0, 9) <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[ELT_CTPOP_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] // v128_t test_i8x16_popcnt(v128_t a) { return wasm_i8x16_popcnt(a); } -// CHECK-LABEL: @test_i8x16_shl( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8 +// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_shl( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B]] to i8 // CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7 // CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer @@ -1595,10 +1738,11 @@ v128_t test_i8x16_shl(v128_t a, uint32_t b) { return wasm_i8x16_shl(a, b); } -// CHECK-LABEL: @test_i8x16_shr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8 +// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_shr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B]] to i8 // CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7 // CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer @@ -1610,10 +1754,11 @@ v128_t test_i8x16_shr(v128_t a, uint32_t b) { return wasm_i8x16_shr(a, b); } -// CHECK-LABEL: @test_u8x16_shr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8 +// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_shr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B]] to i8 // CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7 // CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer @@ -1625,10 +1770,11 @@ v128_t test_u8x16_shr(v128_t a, uint32_t b) { return wasm_u8x16_shr(a, b); } -// CHECK-LABEL: @test_i8x16_add( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_add( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ADD_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -1637,34 +1783,37 @@ v128_t test_i8x16_add(v128_t a, v128_t b) { return wasm_i8x16_add(a, b); } -// CHECK-LABEL: @test_i8x16_add_sat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_add_sat( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> +// CHECK-NEXT: [[ELT_SAT_I:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_SAT_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i8x16_add_sat(v128_t a, v128_t b) { return wasm_i8x16_add_sat(a, b); } -// CHECK-LABEL: @test_u8x16_add_sat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_add_sat( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> +// CHECK-NEXT: [[ELT_SAT_I:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_SAT_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u8x16_add_sat(v128_t a, v128_t b) { return wasm_u8x16_add_sat(a, b); } -// CHECK-LABEL: @test_i8x16_sub( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_sub( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -1673,82 +1822,89 @@ v128_t test_i8x16_sub(v128_t a, v128_t b) { return wasm_i8x16_sub(a, b); } -// CHECK-LABEL: @test_i8x16_sub_sat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_sub_sat( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> +// CHECK-NEXT: [[ELT_SAT_I:%.*]] = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_SAT_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i8x16_sub_sat(v128_t a, v128_t b) { return wasm_i8x16_sub_sat(a, b); } -// CHECK-LABEL: @test_u8x16_sub_sat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_sub_sat( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> +// CHECK-NEXT: [[ELT_SAT_I:%.*]] = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_SAT_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u8x16_sub_sat(v128_t a, v128_t b) { return wasm_u8x16_sub_sat(a, b); } -// CHECK-LABEL: @test_i8x16_min( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_min( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> +// CHECK-NEXT: [[ELT_MIN_I:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_MIN_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i8x16_min(v128_t a, v128_t b) { return wasm_i8x16_min(a, b); } -// CHECK-LABEL: @test_u8x16_min( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_min( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> +// CHECK-NEXT: [[ELT_MIN_I:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_MIN_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u8x16_min(v128_t a, v128_t b) { return wasm_u8x16_min(a, b); } -// CHECK-LABEL: @test_i8x16_max( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i8x16_max( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> +// CHECK-NEXT: [[ELT_MAX_I:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_MAX_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i8x16_max(v128_t a, v128_t b) { return wasm_i8x16_max(a, b); } -// CHECK-LABEL: @test_u8x16_max( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u8x16_max( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> +// CHECK-NEXT: [[ELT_MAX_I:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ELT_MAX_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u8x16_max(v128_t a, v128_t b) { return wasm_u8x16_max(a, b); } -// CHECK-LABEL: @test_u8x16_avgr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_avgr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -1757,9 +1913,10 @@ v128_t test_u8x16_avgr(v128_t a, v128_t b) { return wasm_u8x16_avgr(a, b); } -// CHECK-LABEL: @test_i16x8_abs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_abs( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[ABS_I:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[ABS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -1768,9 +1925,10 @@ v128_t test_i16x8_abs(v128_t a) { return wasm_i16x8_abs(a); } -// CHECK-LABEL: @test_i16x8_neg( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_neg( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, [[TMP0]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -1779,9 +1937,10 @@ v128_t test_i16x8_neg(v128_t a) { return wasm_i16x8_neg(a); } -// CHECK-LABEL: @test_i16x8_all_true( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden zeroext i1 @test_i16x8_all_true( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] @@ -1790,9 +1949,10 @@ bool test_i16x8_all_true(v128_t a) { return wasm_i16x8_all_true(a); } -// CHECK-LABEL: @test_i16x8_bitmask( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden i32 @test_i16x8_bitmask( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -1800,10 +1960,11 @@ uint32_t test_i16x8_bitmask(v128_t a) { return wasm_i16x8_bitmask(a); } -// CHECK-LABEL: @test_i16x8_shl( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16 +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_shl( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B]] to i16 // CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 // CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer @@ -1815,10 +1976,11 @@ v128_t test_i16x8_shl(v128_t a, uint32_t b) { return wasm_i16x8_shl(a, b); } -// CHECK-LABEL: @test_i16x8_shr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16 +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_shr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B]] to i16 // CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 // CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer @@ -1830,10 +1992,11 @@ v128_t test_i16x8_shr(v128_t a, uint32_t b) { return wasm_i16x8_shr(a, b); } -// CHECK-LABEL: @test_u16x8_shr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16 +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_shr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B]] to i16 // CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 // CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer @@ -1845,10 +2008,11 @@ v128_t test_u16x8_shr(v128_t a, uint32_t b) { return wasm_u16x8_shr(a, b); } -// CHECK-LABEL: @test_i16x8_add( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_add( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ADD_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -1857,34 +2021,37 @@ v128_t test_i16x8_add(v128_t a, v128_t b) { return wasm_i16x8_add(a, b); } -// CHECK-LABEL: @test_i16x8_add_sat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_add_sat( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> +// CHECK-NEXT: [[ELT_SAT_I:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_SAT_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i16x8_add_sat(v128_t a, v128_t b) { return wasm_i16x8_add_sat(a, b); } -// CHECK-LABEL: @test_u16x8_add_sat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_add_sat( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> +// CHECK-NEXT: [[ELT_SAT_I:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_SAT_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u16x8_add_sat(v128_t a, v128_t b) { return wasm_u16x8_add_sat(a, b); } -// CHECK-LABEL: @test_i16x8_sub( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_sub( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -1893,34 +2060,37 @@ v128_t test_i16x8_sub(v128_t a, v128_t b) { return wasm_i16x8_sub(a, b); } -// CHECK-LABEL: @test_i16x8_sub_sat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_sub_sat( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> +// CHECK-NEXT: [[ELT_SAT_I:%.*]] = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_SAT_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i16x8_sub_sat(v128_t a, v128_t b) { return wasm_i16x8_sub_sat(a, b); } -// CHECK-LABEL: @test_u16x8_sub_sat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_sub_sat( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> +// CHECK-NEXT: [[ELT_SAT_I:%.*]] = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_SAT_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u16x8_sub_sat(v128_t a, v128_t b) { return wasm_u16x8_sub_sat(a, b); } -// CHECK-LABEL: @test_i16x8_mul( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_mul( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -1929,58 +2099,63 @@ v128_t test_i16x8_mul(v128_t a, v128_t b) { return wasm_i16x8_mul(a, b); } -// CHECK-LABEL: @test_i16x8_min( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_min( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> +// CHECK-NEXT: [[ELT_MIN_I:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_MIN_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i16x8_min(v128_t a, v128_t b) { return wasm_i16x8_min(a, b); } -// CHECK-LABEL: @test_u16x8_min( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_min( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> +// CHECK-NEXT: [[ELT_MIN_I:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_MIN_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u16x8_min(v128_t a, v128_t b) { return wasm_u16x8_min(a, b); } -// CHECK-LABEL: @test_i16x8_max( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i16x8_max( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> +// CHECK-NEXT: [[ELT_MAX_I:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_MAX_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i16x8_max(v128_t a, v128_t b) { return wasm_i16x8_max(a, b); } -// CHECK-LABEL: @test_u16x8_max( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u16x8_max( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> +// CHECK-NEXT: [[ELT_MAX_I:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ELT_MAX_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u16x8_max(v128_t a, v128_t b) { return wasm_u16x8_max(a, b); } -// CHECK-LABEL: @test_u16x8_avgr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_avgr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -1989,27 +2164,30 @@ v128_t test_u16x8_avgr(v128_t a, v128_t b) { return wasm_u16x8_avgr(a, b); } -// CHECK-LABEL: @test_i32x4_abs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ABS_I:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) +// CHECK-LABEL: define hidden noundef range(i32 0, -2147483647) <4 x i32> @test_i32x4_abs( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ABS_I:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A]], i1 false) // CHECK-NEXT: ret <4 x i32> [[ABS_I]] // v128_t test_i32x4_abs(v128_t a) { return wasm_i32x4_abs(a); } -// CHECK-LABEL: @test_i32x4_neg( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_neg( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A]] // CHECK-NEXT: ret <4 x i32> [[SUB_I]] // v128_t test_i32x4_neg(v128_t a) { return wasm_i32x4_neg(a); } -// CHECK-LABEL: @test_i32x4_all_true( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) +// CHECK-LABEL: define hidden zeroext i1 @test_i32x4_all_true( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] // @@ -2017,118 +2195,130 @@ bool test_i32x4_all_true(v128_t a) { return wasm_i32x4_all_true(a); } -// CHECK-LABEL: @test_i32x4_bitmask( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) +// CHECK-LABEL: define hidden i32 @test_i32x4_bitmask( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[TMP0]] // uint32_t test_i32x4_bitmask(v128_t a) { return wasm_i32x4_bitmask(a); } -// CHECK-LABEL: @test_i32x4_shl( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[AND_I:%.*]] = and i32 [[B:%.*]], 31 +// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_shl( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[AND_I:%.*]] = and i32 [[B]], 31 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]] +// CHECK-NEXT: [[SHL_I:%.*]] = shl <4 x i32> [[A]], [[SPLAT_SPLAT_I]] // CHECK-NEXT: ret <4 x i32> [[SHL_I]] // v128_t test_i32x4_shl(v128_t a, uint32_t b) { return wasm_i32x4_shl(a, b); } -// CHECK-LABEL: @test_i32x4_shr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[AND_I:%.*]] = and i32 [[B:%.*]], 31 +// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_shr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[AND_I:%.*]] = and i32 [[B]], 31 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]] +// CHECK-NEXT: [[SHR_I:%.*]] = ashr <4 x i32> [[A]], [[SPLAT_SPLAT_I]] // CHECK-NEXT: ret <4 x i32> [[SHR_I]] // v128_t test_i32x4_shr(v128_t a, uint32_t b) { return wasm_i32x4_shr(a, b); } -// CHECK-LABEL: @test_u32x4_shr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[AND_I:%.*]] = and i32 [[B:%.*]], 31 +// CHECK-LABEL: define hidden <4 x i32> @test_u32x4_shr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[AND_I:%.*]] = and i32 [[B]], 31 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]] +// CHECK-NEXT: [[SHR_I:%.*]] = lshr <4 x i32> [[A]], [[SPLAT_SPLAT_I]] // CHECK-NEXT: ret <4 x i32> [[SHR_I]] // v128_t test_u32x4_shr(v128_t a, uint32_t b) { return wasm_u32x4_shr(a, b); } -// CHECK-LABEL: @test_i32x4_add( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[B:%.*]], [[A:%.*]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_add( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[B]], [[A]] // CHECK-NEXT: ret <4 x i32> [[ADD_I]] // v128_t test_i32x4_add(v128_t a, v128_t b) { return wasm_i32x4_add(a, b); } -// CHECK-LABEL: @test_i32x4_sub( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_sub( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[B]] // CHECK-NEXT: ret <4 x i32> [[SUB_I]] // v128_t test_i32x4_sub(v128_t a, v128_t b) { return wasm_i32x4_sub(a, b); } -// CHECK-LABEL: @test_i32x4_mul( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[A:%.*]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_mul( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[A]] // CHECK-NEXT: ret <4 x i32> [[MUL_I]] // v128_t test_i32x4_mul(v128_t a, v128_t b) { return wasm_i32x4_mul(a, b); } -// CHECK-LABEL: @test_i32x4_min( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_min( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_MIN_I:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[A]], <4 x i32> [[B]]) +// CHECK-NEXT: ret <4 x i32> [[ELT_MIN_I]] // v128_t test_i32x4_min(v128_t a, v128_t b) { return wasm_i32x4_min(a, b); } -// CHECK-LABEL: @test_u32x4_min( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_min( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_MIN_I:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[A]], <4 x i32> [[B]]) +// CHECK-NEXT: ret <4 x i32> [[ELT_MIN_I]] // v128_t test_u32x4_min(v128_t a, v128_t b) { return wasm_u32x4_min(a, b); } -// CHECK-LABEL: @test_i32x4_max( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_max( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_MAX_I:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[A]], <4 x i32> [[B]]) +// CHECK-NEXT: ret <4 x i32> [[ELT_MAX_I]] // v128_t test_i32x4_max(v128_t a, v128_t b) { return wasm_i32x4_max(a, b); } -// CHECK-LABEL: @test_u32x4_max( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_max( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_MAX_I:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[A]], <4 x i32> [[B]]) +// CHECK-NEXT: ret <4 x i32> [[ELT_MAX_I]] // v128_t test_u32x4_max(v128_t a, v128_t b) { return wasm_u32x4_max(a, b); } -// CHECK-LABEL: @test_i32x4_dot_i16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_dot_i16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2136,9 +2326,10 @@ v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) { return wasm_i32x4_dot_i16x8(a, b); } -// CHECK-LABEL: @test_i64x2_abs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_abs( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> // CHECK-NEXT: [[ABS_I:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[ABS_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -2147,9 +2338,10 @@ v128_t test_i64x2_abs(v128_t a) { return wasm_i64x2_abs(a); } -// CHECK-LABEL: @test_i64x2_neg( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_neg( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, [[TMP0]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -2158,9 +2350,10 @@ v128_t test_i64x2_neg(v128_t a) { return wasm_i64x2_neg(a); } -// CHECK-LABEL: @test_i64x2_all_true( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden zeroext i1 @test_i64x2_all_true( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] @@ -2169,9 +2362,10 @@ bool test_i64x2_all_true(v128_t a) { return wasm_i64x2_all_true(a); } -// CHECK-LABEL: @test_i64x2_bitmask( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden i32 @test_i64x2_bitmask( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) // CHECK-NEXT: ret i32 [[TMP1]] // @@ -2179,10 +2373,11 @@ uint32_t test_i64x2_bitmask(v128_t a) { return wasm_i64x2_bitmask(a); } -// CHECK-LABEL: @test_i64x2_shl( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 63 +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_shl( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], 63 // CHECK-NEXT: [[AND_I:%.*]] = zext nneg i32 [[TMP1]] to i64 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -2194,10 +2389,11 @@ v128_t test_i64x2_shl(v128_t a, uint32_t b) { return wasm_i64x2_shl(a, b); } -// CHECK-LABEL: @test_i64x2_shr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 63 +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_shr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], 63 // CHECK-NEXT: [[AND_I:%.*]] = zext nneg i32 [[TMP1]] to i64 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -2209,10 +2405,11 @@ v128_t test_i64x2_shr(v128_t a, uint32_t b) { return wasm_i64x2_shr(a, b); } -// CHECK-LABEL: @test_u64x2_shr( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 63 +// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_shr( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], 63 // CHECK-NEXT: [[AND_I:%.*]] = zext nneg i32 [[TMP1]] to i64 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -2224,10 +2421,11 @@ v128_t test_u64x2_shr(v128_t a, uint32_t b) { return wasm_u64x2_shr(a, b); } -// CHECK-LABEL: @test_i64x2_add( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_add( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> // CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[ADD_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2236,10 +2434,11 @@ v128_t test_i64x2_add(v128_t a, v128_t b) { return wasm_i64x2_add(a, b); } -// CHECK-LABEL: @test_i64x2_sub( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_sub( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2248,10 +2447,11 @@ v128_t test_i64x2_sub(v128_t a, v128_t b) { return wasm_i64x2_sub(a, b); } -// CHECK-LABEL: @test_i64x2_mul( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i64x2_mul( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> // CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i64> [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2260,9 +2460,10 @@ v128_t test_i64x2_mul(v128_t a, v128_t b) { return wasm_i64x2_mul(a, b); } -// CHECK-LABEL: @test_f32x4_abs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_abs( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2271,9 +2472,10 @@ v128_t test_f32x4_abs(v128_t a) { return wasm_f32x4_abs(a); } -// CHECK-LABEL: @test_f32x4_neg( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_neg( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[TMP0]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -2282,9 +2484,10 @@ v128_t test_f32x4_neg(v128_t a) { return wasm_f32x4_neg(a); } -// CHECK-LABEL: @test_f32x4_sqrt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_sqrt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2293,9 +2496,10 @@ v128_t test_f32x4_sqrt(v128_t a) { return wasm_f32x4_sqrt(a); } -// CHECK-LABEL: @test_f32x4_ceil( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_ceil( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2304,9 +2508,10 @@ v128_t test_f32x4_ceil(v128_t a) { return wasm_f32x4_ceil(a); } -// CHECK-LABEL: @test_f32x4_floor( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_floor( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2315,9 +2520,10 @@ v128_t test_f32x4_floor(v128_t a) { return wasm_f32x4_floor(a); } -// CHECK-LABEL: @test_f32x4_trunc( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_trunc( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2326,9 +2532,10 @@ v128_t test_f32x4_trunc(v128_t a) { return wasm_f32x4_trunc(a); } -// CHECK-LABEL: @test_f32x4_nearest( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_nearest( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2337,10 +2544,11 @@ v128_t test_f32x4_nearest(v128_t a) { return wasm_f32x4_nearest(a); } -// CHECK-LABEL: @test_f32x4_add( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_add( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ADD_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2349,10 +2557,11 @@ v128_t test_f32x4_add(v128_t a, v128_t b) { return wasm_f32x4_add(a, b); } -// CHECK-LABEL: @test_f32x4_sub( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_sub( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[SUB_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2361,10 +2570,11 @@ v128_t test_f32x4_sub(v128_t a, v128_t b) { return wasm_f32x4_sub(a, b); } -// CHECK-LABEL: @test_f32x4_mul( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_mul( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[MUL_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2373,10 +2583,11 @@ v128_t test_f32x4_mul(v128_t a, v128_t b) { return wasm_f32x4_mul(a, b); } -// CHECK-LABEL: @test_f32x4_div( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_div( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[DIV_I:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[DIV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2385,10 +2596,11 @@ v128_t test_f32x4_div(v128_t a, v128_t b) { return wasm_f32x4_div(a, b); } -// CHECK-LABEL: @test_f32x4_min( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_min( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2397,10 +2609,11 @@ v128_t test_f32x4_min(v128_t a, v128_t b) { return wasm_f32x4_min(a, b); } -// CHECK-LABEL: @test_f32x4_max( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_max( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2409,10 +2622,11 @@ v128_t test_f32x4_max(v128_t a, v128_t b) { return wasm_f32x4_max(a, b); } -// CHECK-LABEL: @test_f32x4_pmin( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden <4 x i32> @test_f32x4_pmin( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2421,10 +2635,11 @@ v128_t test_f32x4_pmin(v128_t a, v128_t b) { return wasm_f32x4_pmin(a, b); } -// CHECK-LABEL: @test_f32x4_pmax( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +// CHECK-LABEL: define hidden <4 x i32> @test_f32x4_pmax( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <4 x float> // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2433,9 +2648,10 @@ v128_t test_f32x4_pmax(v128_t a, v128_t b) { return wasm_f32x4_pmax(a, b); } -// CHECK-LABEL: @test_f64x2_abs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_abs( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2444,9 +2660,10 @@ v128_t test_f64x2_abs(v128_t a) { return wasm_f64x2_abs(a); } -// CHECK-LABEL: @test_f64x2_neg( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_neg( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x double> [[TMP0]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -2455,9 +2672,10 @@ v128_t test_f64x2_neg(v128_t a) { return wasm_f64x2_neg(a); } -// CHECK-LABEL: @test_f64x2_sqrt( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_sqrt( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2466,9 +2684,10 @@ v128_t test_f64x2_sqrt(v128_t a) { return wasm_f64x2_sqrt(a); } -// CHECK-LABEL: @test_f64x2_ceil( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_ceil( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2477,9 +2696,10 @@ v128_t test_f64x2_ceil(v128_t a) { return wasm_f64x2_ceil(a); } -// CHECK-LABEL: @test_f64x2_floor( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_floor( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2488,9 +2708,10 @@ v128_t test_f64x2_floor(v128_t a) { return wasm_f64x2_floor(a); } -// CHECK-LABEL: @test_f64x2_trunc( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_trunc( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2499,9 +2720,10 @@ v128_t test_f64x2_trunc(v128_t a) { return wasm_f64x2_trunc(a); } -// CHECK-LABEL: @test_f64x2_nearest( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_nearest( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2510,10 +2732,11 @@ v128_t test_f64x2_nearest(v128_t a) { return wasm_f64x2_nearest(a); } -// CHECK-LABEL: @test_f64x2_add( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_add( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[ADD_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2522,10 +2745,11 @@ v128_t test_f64x2_add(v128_t a, v128_t b) { return wasm_f64x2_add(a, b); } -// CHECK-LABEL: @test_f64x2_sub( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_sub( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[SUB_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2534,10 +2758,11 @@ v128_t test_f64x2_sub(v128_t a, v128_t b) { return wasm_f64x2_sub(a, b); } -// CHECK-LABEL: @test_f64x2_mul( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_mul( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[MUL_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2546,10 +2771,11 @@ v128_t test_f64x2_mul(v128_t a, v128_t b) { return wasm_f64x2_mul(a, b); } -// CHECK-LABEL: @test_f64x2_div( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_div( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[DIV_I:%.*]] = fdiv <2 x double> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[DIV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2558,10 +2784,11 @@ v128_t test_f64x2_div(v128_t a, v128_t b) { return wasm_f64x2_div(a, b); } -// CHECK-LABEL: @test_f64x2_min( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_min( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2570,10 +2797,11 @@ v128_t test_f64x2_min(v128_t a, v128_t b) { return wasm_f64x2_min(a, b); } -// CHECK-LABEL: @test_f64x2_max( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f64x2_max( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2582,10 +2810,11 @@ v128_t test_f64x2_max(v128_t a, v128_t b) { return wasm_f64x2_max(a, b); } -// CHECK-LABEL: @test_f64x2_pmin( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_pmin( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2594,10 +2823,11 @@ v128_t test_f64x2_pmin(v128_t a, v128_t b) { return wasm_f64x2_pmin(a, b); } -// CHECK-LABEL: @test_f64x2_pmax( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> +// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_pmax( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <2 x double> // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2606,9 +2836,10 @@ v128_t test_f64x2_pmax(v128_t a, v128_t b) { return wasm_f64x2_pmax(a, b); } -// CHECK-LABEL: @test_i32x4_trunc_sat_f32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_trunc_sat_f32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2616,9 +2847,10 @@ v128_t test_i32x4_trunc_sat_f32x4(v128_t a) { return wasm_i32x4_trunc_sat_f32x4(a); } -// CHECK-LABEL: @test_u32x4_trunc_sat_f32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_trunc_sat_f32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2626,9 +2858,10 @@ v128_t test_u32x4_trunc_sat_f32x4(v128_t a) { return wasm_u32x4_trunc_sat_f32x4(a); } -// CHECK-LABEL: @test_f32x4_convert_i32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CONV_I:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_convert_i32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV_I:%.*]] = sitofp <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -2636,9 +2869,10 @@ v128_t test_f32x4_convert_i32x4(v128_t a) { return wasm_f32x4_convert_i32x4(a); } -// CHECK-LABEL: @test_f32x4_convert_u32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CONV_I:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_convert_u32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV_I:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -2646,9 +2880,10 @@ v128_t test_f32x4_convert_u32x4(v128_t a) { return wasm_f32x4_convert_u32x4(a); } -// CHECK-LABEL: @test_f64x2_convert_low_i32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_convert_low_i32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = sitofp <2 x i32> [[VECINIT2_I]] to <2 x double> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -2657,9 +2892,10 @@ v128_t test_f64x2_convert_low_i32x4(v128_t a) { return wasm_f64x2_convert_low_i32x4(a); } -// CHECK-LABEL: @test_f64x2_convert_low_u32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_convert_low_u32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = uitofp <2 x i32> [[VECINIT2_I]] to <2 x double> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -2668,9 +2904,10 @@ v128_t test_f64x2_convert_low_u32x4(v128_t a) { return wasm_f64x2_convert_low_u32x4(a); } -// CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_i32x4_trunc_sat_f64x2_zero( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2679,9 +2916,10 @@ v128_t test_i32x4_trunc_sat_f64x2_zero(v128_t a) { return wasm_i32x4_trunc_sat_f64x2_zero(a); } -// CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_u32x4_trunc_sat_f64x2_zero( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2690,9 +2928,10 @@ v128_t test_u32x4_trunc_sat_f64x2_zero(v128_t a) { return wasm_u32x4_trunc_sat_f64x2_zero(a); } -// CHECK-LABEL: @test_f32x4_demote_f64x2_zero( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> +// CHECK-LABEL: define hidden noundef <4 x i32> @test_f32x4_demote_f64x2_zero( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x double> // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> zeroinitializer, <4 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = fptrunc <4 x double> [[SHUFFLE_I]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32> @@ -2702,9 +2941,10 @@ v128_t test_f32x4_demote_f64x2_zero(v128_t a) { return wasm_f32x4_demote_f64x2_zero(a); } -// CHECK-LABEL: @test_f64x2_promote_low_f32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-LABEL: define hidden <4 x i32> @test_f64x2_promote_low_f32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = fpext <2 x float> [[VECINIT2_I]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32> @@ -2714,10 +2954,11 @@ v128_t test_f64x2_promote_low_f32x4(v128_t a) { return wasm_f64x2_promote_low_f32x4(a); } -// CHECK-LABEL: @test_i8x16_shuffle( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_shuffle( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2726,10 +2967,11 @@ v128_t test_i8x16_shuffle(v128_t a, v128_t b) { return wasm_i8x16_shuffle(a, b, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } -// CHECK-LABEL: @test_i16x8_shuffle( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_shuffle( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2738,10 +2980,11 @@ v128_t test_i16x8_shuffle(v128_t a, v128_t b) { return wasm_i16x8_shuffle(a, b, 7, 6, 5, 4, 3, 2, 1, 0); } -// CHECK-LABEL: @test_i32x4_shuffle( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_shuffle( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2750,10 +2993,11 @@ v128_t test_i32x4_shuffle(v128_t a, v128_t b) { return wasm_i32x4_shuffle(a, b, 3, 2, 1, 0); } -// CHECK-LABEL: @test_i64x2_shuffle( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_shuffle( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2762,10 +3006,11 @@ v128_t test_i64x2_shuffle(v128_t a, v128_t b) { return wasm_i64x2_shuffle(a, b, 1, 0); } -// CHECK-LABEL: @test_i8x16_swizzle( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_swizzle( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2774,10 +3019,11 @@ v128_t test_i8x16_swizzle(v128_t a, v128_t b) { return wasm_i8x16_swizzle(a, b); } -// CHECK-LABEL: @test_i8x16_narrow_i16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden <4 x i32> @test_i8x16_narrow_i16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2786,10 +3032,11 @@ v128_t test_i8x16_narrow_i16x8(v128_t a, v128_t b) { return wasm_i8x16_narrow_i16x8(a, b); } -// CHECK-LABEL: @test_u8x16_narrow_i16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden <4 x i32> @test_u8x16_narrow_i16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2798,9 +3045,10 @@ v128_t test_u8x16_narrow_i16x8(v128_t a, v128_t b) { return wasm_u8x16_narrow_i16x8(a, b); } -// CHECK-LABEL: @test_i16x8_narrow_i32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_narrow_i32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A]], <4 x i32> [[B]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2808,9 +3056,10 @@ v128_t test_i16x8_narrow_i32x4(v128_t a, v128_t b) { return wasm_i16x8_narrow_i32x4(a, b); } -// CHECK-LABEL: @test_u16x8_narrow_i32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_narrow_i32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A]], <4 x i32> [[B]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2818,9 +3067,10 @@ v128_t test_u16x8_narrow_i32x4(v128_t a, v128_t b) { return wasm_u16x8_narrow_i32x4(a, b); } -// CHECK-LABEL: @test_i16x8_extend_low_i8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extend_low_i8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32> @@ -2830,9 +3080,10 @@ v128_t test_i16x8_extend_low_i8x16(v128_t a) { return wasm_i16x8_extend_low_i8x16(a); } -// CHECK-LABEL: @test_i16x8_extend_high_i8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extend_high_i8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32> @@ -2842,9 +3093,10 @@ v128_t test_i16x8_extend_high_i8x16(v128_t a) { return wasm_i16x8_extend_high_i8x16(a); } -// CHECK-LABEL: @test_u16x8_extend_low_u8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extend_low_u8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32> @@ -2854,9 +3106,10 @@ v128_t test_u16x8_extend_low_u8x16(v128_t a) { return wasm_u16x8_extend_low_u8x16(a); } -// CHECK-LABEL: @test_u16x8_extend_high_u8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extend_high_u8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32> @@ -2866,9 +3119,10 @@ v128_t test_u16x8_extend_high_u8x16(v128_t a) { return wasm_u16x8_extend_high_u8x16(a); } -// CHECK-LABEL: @test_i32x4_extend_low_i16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden range(i32 -32768, 32768) <4 x i32> @test_i32x4_extend_low_i16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[CONV_I]] @@ -2877,9 +3131,10 @@ v128_t test_i32x4_extend_low_i16x8(v128_t a) { return wasm_i32x4_extend_low_i16x8(a); } -// CHECK-LABEL: @test_i32x4_extend_high_i16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden range(i32 -32768, 32768) <4 x i32> @test_i32x4_extend_high_i16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[CONV_I]] @@ -2888,9 +3143,10 @@ v128_t test_i32x4_extend_high_i16x8(v128_t a) { return wasm_i32x4_extend_high_i16x8(a); } -// CHECK-LABEL: @test_u32x4_extend_low_u16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden range(i32 0, 65536) <4 x i32> @test_u32x4_extend_low_u16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[CONV_I]] @@ -2899,9 +3155,10 @@ v128_t test_u32x4_extend_low_u16x8(v128_t a) { return wasm_u32x4_extend_low_u16x8(a); } -// CHECK-LABEL: @test_u32x4_extend_high_u16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden range(i32 0, 65536) <4 x i32> @test_u32x4_extend_high_u16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[CONV_I]] @@ -2910,9 +3167,10 @@ v128_t test_u32x4_extend_high_u16x8(v128_t a) { return wasm_u32x4_extend_high_u16x8(a); } -// CHECK-LABEL: @test_i64x2_extend_low_i32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_extend_low_i32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -2921,9 +3179,10 @@ v128_t test_i64x2_extend_low_i32x4(v128_t a) { return wasm_i64x2_extend_low_i32x4(a); } -// CHECK-LABEL: @test_i64x2_extend_high_i32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_extend_high_i32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -2932,9 +3191,10 @@ v128_t test_i64x2_extend_high_i32x4(v128_t a) { return wasm_i64x2_extend_high_i32x4(a); } -// CHECK-LABEL: @test_u64x2_extend_low_u32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_extend_low_u32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -2943,9 +3203,10 @@ v128_t test_u64x2_extend_low_u32x4(v128_t a) { return wasm_u64x2_extend_low_u32x4(a); } -// CHECK-LABEL: @test_u64x2_extend_high_u32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_extend_high_u32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64> // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP0]] @@ -2954,9 +3215,10 @@ v128_t test_u64x2_extend_high_u32x4(v128_t a) { return wasm_u64x2_extend_high_u32x4(a); } -// CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extadd_pairwise_i8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2965,9 +3227,10 @@ v128_t test_i16x8_extadd_pairwise_i8x16(v128_t a) { return wasm_i16x8_extadd_pairwise_i8x16(a); } -// CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extadd_pairwise_u8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] @@ -2976,9 +3239,10 @@ v128_t test_u16x8_extadd_pairwise_u8x16(v128_t a) { return wasm_u16x8_extadd_pairwise_u8x16(a); } -// CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden <4 x i32> @test_i32x4_extadd_pairwise_i16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2986,9 +3250,10 @@ v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) { return wasm_i32x4_extadd_pairwise_i16x8(a); } -// CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden <4 x i32> @test_u32x4_extadd_pairwise_u16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2996,12 +3261,13 @@ v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) { return wasm_u32x4_extadd_pairwise_u16x8(a); } -// CHECK-LABEL: @test_i16x8_extmul_low_i8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extmul_low_i8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16> // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]] @@ -3012,12 +3278,13 @@ v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) { return wasm_i16x8_extmul_low_i8x16(a, b); } -// CHECK-LABEL: @test_i16x8_extmul_high_i8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_extmul_high_i8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16> // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]] @@ -3028,12 +3295,13 @@ v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) { return wasm_i16x8_extmul_high_i8x16(a, b); } -// CHECK-LABEL: @test_u16x8_extmul_low_u8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extmul_low_u8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16> // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]] @@ -3044,12 +3312,13 @@ v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) { return wasm_u16x8_extmul_low_u8x16(a, b); } -// CHECK-LABEL: @test_u16x8_extmul_high_u8x16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> +// CHECK-LABEL: define hidden <4 x i32> @test_u16x8_extmul_high_u8x16( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16> // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]] @@ -3060,12 +3329,13 @@ v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) { return wasm_u16x8_extmul_high_u8x16(a, b); } -// CHECK-LABEL: @test_i32x4_extmul_low_i16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden range(i32 -1073709056, 1073741825) <4 x i32> @test_i32x4_extmul_low_i16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32> // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]] @@ -3075,12 +3345,13 @@ v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) { return wasm_i32x4_extmul_low_i16x8(a, b); } -// CHECK-LABEL: @test_i32x4_extmul_high_i16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden range(i32 -1073709056, 1073741825) <4 x i32> @test_i32x4_extmul_high_i16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32> // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]] @@ -3090,12 +3361,13 @@ v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) { return wasm_i32x4_extmul_high_i16x8(a, b); } -// CHECK-LABEL: @test_u32x4_extmul_low_u16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden range(i32 0, -131070) <4 x i32> @test_u32x4_extmul_low_u16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32> // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]] @@ -3105,12 +3377,13 @@ v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) { return wasm_u32x4_extmul_low_u16x8(a, b); } -// CHECK-LABEL: @test_u32x4_extmul_high_u16x8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden range(i32 0, -131070) <4 x i32> @test_u32x4_extmul_high_u16x8( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32> // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]] @@ -3120,11 +3393,12 @@ v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) { return wasm_u32x4_extmul_high_u16x8(a, b); } -// CHECK-LABEL: @test_i64x2_extmul_low_i32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_extmul_low_i32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64> -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64> // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32> @@ -3134,11 +3408,12 @@ v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) { return wasm_i64x2_extmul_low_i32x4(a, b); } -// CHECK-LABEL: @test_i64x2_extmul_high_i32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_i64x2_extmul_high_i32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64> -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64> // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32> @@ -3148,11 +3423,12 @@ v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) { return wasm_i64x2_extmul_high_i32x4(a, b); } -// CHECK-LABEL: @test_u64x2_extmul_low_u32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_extmul_low_u32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64> -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64> // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32> @@ -3162,11 +3438,12 @@ v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) { return wasm_u64x2_extmul_low_u32x4(a, b); } -// CHECK-LABEL: @test_u64x2_extmul_high_u32x4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-LABEL: define hidden <4 x i32> @test_u64x2_extmul_high_u32x4( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64> -// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> +// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64> // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32> @@ -3176,10 +3453,11 @@ v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) { return wasm_u64x2_extmul_high_u32x4(a, b); } -// CHECK-LABEL: @test_i16x8_q15mulr_sat( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> +// CHECK-LABEL: define hidden <4 x i32> @test_i16x8_q15mulr_sat( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <8 x i16> // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -3187,3 +3465,8 @@ v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) { v128_t test_i16x8_q15mulr_sat(v128_t a, v128_t b) { return wasm_i16x8_q15mulr_sat(a, b); } +//. +// CHECK: [[CHAR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/OpenMP/bug54082.c b/clang/test/OpenMP/bug54082.c index bda4bd29b9e66..ef3e7153545bf 100644 --- a/clang/test/OpenMP/bug54082.c +++ b/clang/test/OpenMP/bug54082.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 6 // RUN: %clang_cc1 -fopenmp -O1 -x c -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK typedef enum omp_allocator_handle_t { @@ -63,47 +63,47 @@ void foo() { (void)x; } } -// CHECK-LABEL: define {{[^@]+}}@foo -// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define void @foo( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_TRAITS:%.*]] = alloca [1 x %struct.omp_alloctrait_t], align 16 // CHECK-NEXT: [[X_ALLOC:%.*]] = alloca i64, align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[X_TRAITS]]) #[[ATTR5:[0-9]+]] // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) [[X_TRAITS]], ptr noundef nonnull align 16 dereferenceable(16) @__const.foo.x_traits, i64 16, i1 false) // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[X_ALLOC]]) #[[ATTR5]] // CHECK-NEXT: [[CALL:%.*]] = call i64 @omp_init_allocator(i64 noundef 0, i32 noundef 1, ptr noundef nonnull [[X_TRAITS]]) #[[ATTR5]] -// CHECK-NEXT: store i64 [[CALL]], ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3:![0-9]+]] +// CHECK-NEXT: store i64 [[CALL]], ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA3:![0-9]+]] // CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB2:[0-9]+]], i32 1, ptr nonnull @foo.omp_outlined, ptr nonnull [[X_ALLOC]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X_ALLOC]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[X_TRAITS]]) #[[ATTR5]] // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@foo.omp_outlined -// CHECK-SAME: (ptr noalias noundef readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr noundef nonnull readonly align 8 captures(none) dereferenceable(8) [[X_ALLOC:%.*]]) #[[ATTR4:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define internal void @foo.omp_outlined( +// CHECK-SAME: ptr noalias noundef readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr noundef nonnull readonly align 8 captures(none) dereferenceable(8) [[X_ALLOC:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_LB]]) #[[ATTR5]] -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA7:![0-9]+]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_UB]]) #[[ATTR5]] -// CHECK-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA7]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_STRIDE]]) #[[ATTR5]] -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA7]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[DOTOMP_IS_LAST]]) #[[ATTR5]] -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA3]] // CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP1]] to ptr // CHECK-NEXT: [[DOTX__VOID_ADDR:%.*]] = tail call ptr @__kmpc_alloc(i32 [[TMP0]], i64 8, ptr [[CONV]]) // CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr nonnull [[DOTOMP_IS_LAST]], ptr nonnull [[DOTOMP_LB]], ptr nonnull [[DOTOMP_UB]], ptr nonnull [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA7]] // CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP2]], i32 1023) -// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA7]] // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[LONG_TBAA3]] // CHECK-NEXT: [[CONV5:%.*]] = inttoptr i64 [[TMP3]] to ptr // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr [[CONV5]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[DOTOMP_IS_LAST]]) #[[ATTR5]] @@ -112,3 +112,11 @@ void foo() { // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[DOTOMP_LB]]) #[[ATTR5]] // CHECK-NEXT: ret void // +//. +// CHECK: [[LONG_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[INT_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"int", [[META5]], i64 0} +//. diff --git a/clang/test/OpenMP/bug56913.c b/clang/test/OpenMP/bug56913.c index fad9e17ac4dd8..fa5e46d30ae85 100644 --- a/clang/test/OpenMP/bug56913.c +++ b/clang/test/OpenMP/bug56913.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 6 // RUN: %clang_cc1 -fopenmp-simd -O1 -x c -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK int j; @@ -12,21 +12,31 @@ void loop(int n) { u = &j; } } -// CHECK-LABEL: define {{[^@]+}}@loop -// CHECK-SAME: (i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define void @loop( +// CHECK-SAME: i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N]], 0 -// CHECK-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] -// CHECK: simd.if.then: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: br i1 [[CMP]], label %[[SIMD_IF_THEN:.*]], label %[[SIMD_IF_END:.*]] +// CHECK: [[SIMD_IF_THEN]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[INT_TBAA2:![0-9]+]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[J]]) #[[ATTR2:[0-9]+]] -// CHECK-NEXT: store ptr [[J]], ptr @u, align 8, !tbaa [[TBAA6:![0-9]+]], !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK-NEXT: store ptr [[J]], ptr @u, align 8, !tbaa [[INTPTR_TBAA6:![0-9]+]], !llvm.access.group [[ACC_GRP9:![0-9]+]] // CHECK-NEXT: [[INC_LE:%.*]] = add i32 [[TMP0]], [[N]] -// CHECK-NEXT: store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[INT_TBAA2]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[J]]) #[[ATTR2]] -// CHECK-NEXT: br label [[SIMD_IF_END]] -// CHECK: simd.if.end: +// CHECK-NEXT: br label %[[SIMD_IF_END]] +// CHECK: [[SIMD_IF_END]]: // CHECK-NEXT: ret void // +//. +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[INTPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"p1 int", [[META8:![0-9]+]], i64 0} +// CHECK: [[META8]] = !{!"any pointer", [[META4]], i64 0} +// CHECK: [[ACC_GRP9]] = distinct !{} +//. diff --git a/clang/test/OpenMP/bug57757.cpp b/clang/test/OpenMP/bug57757.cpp index caf53a5b62c1c..5b61e143a0548 100644 --- a/clang/test/OpenMP/bug57757.cpp +++ b/clang/test/OpenMP/bug57757.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 6 // RUN: %clang_cc1 -fopenmp -O1 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK template @@ -14,42 +14,42 @@ void foo() { float b; run_task(bar, a, b); } -// CHECK-LABEL: define {{[^@]+}}@_Z3foov -// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define void @_Z3foov( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1:[0-9]+]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 0, i64 56, i64 1, ptr nonnull @.omp_task_entry.) // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 -// CHECK-NEXT: store ptr @_Z3barif, ptr [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]] +// CHECK-NEXT: store ptr @_Z3barif, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA3:![0-9]+]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 -// CHECK-NEXT: store i32 0, ptr [[TMP3]], align 8, !tbaa [[TBAA12:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[TMP3]], align 8, !tbaa [[INT_TBAA12:![0-9]+]] // CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry. -// CHECK-SAME: (i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define internal noundef i32 @.omp_task_entry.( +// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA16:![0-9]+]], !alias.scope [[META13]], !noalias [[META17:![0-9]+]] -// CHECK-NEXT: switch i32 [[TMP3]], label [[DOTOMP_OUTLINED__EXIT:%.*]] [ -// CHECK-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] -// CHECK-NEXT: i32 1, label [[DOTUNTIED_NEXT__I:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA16:![0-9]+]], !alias.scope [[META13]], !noalias [[META17:![0-9]+]] +// CHECK-NEXT: switch i32 [[TMP3]], [[DOTOMP_OUTLINED__EXIT:label %.*]] [ +// CHECK-NEXT: i32 0, [[DOTUNTIED_JMP__I:label %.*]] +// CHECK-NEXT: i32 1, [[DOTUNTIED_NEXT__I:label %.*]] // CHECK-NEXT: ] -// CHECK: .untied.jmp..i: -// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA16]], !alias.scope [[META13]], !noalias [[META17]] +// CHECK: [[_UNTIED_JMP__I:.*:]] +// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA16]], !alias.scope [[META13]], !noalias [[META17]] // CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP1]]), !noalias [[META13]] -// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] -// CHECK: .untied.next..i: +// CHECK-NEXT: br [[DOTOMP_OUTLINED__EXIT]] +// CHECK: [[_UNTIED_NEXT__I:.*:]] // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52 // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA19:![0-9]+]], !noalias [[META13]] -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 8, !tbaa [[TBAA16]], !noalias [[META13]] -// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA20:![0-9]+]], !noalias [[META13]] +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA19:![0-9]+]], !noalias [[META13]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 8, !tbaa [[INT_TBAA16]], !noalias [[META13]] +// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[FLOAT_TBAA20:![0-9]+]], !noalias [[META13]] // CHECK-NEXT: tail call void [[TMP8]](i32 noundef [[TMP9]], float noundef [[TMP10]]) #[[ATTR2:[0-9]+]], !noalias [[META13]] -// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] -// CHECK: .omp_outlined..exit: +// CHECK-NEXT: br [[DOTOMP_OUTLINED__EXIT]] +// CHECK: [[_OMP_OUTLINED__EXIT:.*:]] // CHECK-NEXT: ret i32 0 // diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index 20e344f0a34a0..46c87eb31969d 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 6 // RUN: %clang_cc1 -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 // RUN: %clang_cc1 -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc @@ -30,30 +30,30 @@ void test() { complex_reduction(); } #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 -// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define weak_odr protected ptx_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16( +// CHECK1-SAME: ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[TBAA10:![0-9]+]] +// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[ANYPTR_TBAA6:![0-9]+]] // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1: user_code.entry: +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] +// CHECK1: [[USER_CODE_ENTRY]]: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA15:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA10:![0-9]+]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void -// CHECK1: worker.exit: +// CHECK1: [[WORKER_EXIT]]: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined( +// CHECK1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -66,82 +66,82 @@ void test() { // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12:![0-9]+]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[IB]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] +// CHECK1: [[OMP_INNER_FOR_COND]]: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]] +// CHECK1: [[OMP_INNER_FOR_COND_CLEANUP]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_END:.*]] +// CHECK1: [[OMP_INNER_FOR_BODY]]: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA19:![0-9]+]] +// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[FLOAT_TBAA14:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP2]]) #[[ATTR4]] -// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP2]], align 4, !tbaa [[TBAA19]] +// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP2]], align 4, !tbaa [[FLOAT_TBAA14]] // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP2]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA21:![0-9]+]] +// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[ANYPTR_TBAA16:![0-9]+]] // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA21]] +// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[ANYPTR_TBAA16]] // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA21]] +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[ANYPTR_TBAA16]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] +// CHECK1: [[OMP_BODY_CONTINUE]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] +// CHECK1: [[OMP_INNER_FOR_INC]]: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND]] +// CHECK1: [[OMP_INNER_FOR_END]]: +// CHECK1-NEXT: br label %[[OMP_LOOP_EXIT:.*]] +// CHECK1: [[OMP_LOOP_EXIT]]: // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[IB]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] @@ -155,15 +155,15 @@ void test() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_ -// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5:[0-9]+]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden void @_ZNSt7complexIfEC1ERKfS2_( +// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5:[0-9]+]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23:![0-9]+]] -// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA25:![0-9]+]] -// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA25]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18:![0-9]+]] +// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20:![0-9]+]] +// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8 @@ -171,9 +171,9 @@ void test() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined( +// CHECK1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8 @@ -197,155 +197,155 @@ void test() { // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA23]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA23]] +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] +// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] +// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]], !nonnull [[META22:![0-9]+]], !align [[META23:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]], !nonnull [[META22]], !align [[META23]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]], !nonnull [[META22]], !align [[META23]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: +// CHECK1-NEXT: br i1 [[CMP]], label %[[OMP_PRECOND_THEN:.*]], label %[[OMP_PRECOND_END:.*]] +// CHECK1: [[OMP_PRECOND_THEN]]: // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA19]] +// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[FLOAT_TBAA14]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP6]]) #[[ATTR4]] -// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP6]], align 4, !tbaa [[TBAA19]] +// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP6]], align 4, !tbaa [[FLOAT_TBAA14]] // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP6]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I7]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br label %[[OMP_DISPATCH_COND:.*]] +// CHECK1: [[OMP_DISPATCH_COND]]: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP8]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], %[[COND_TRUE]] ], [ [[TMP16]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP19]], 1 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] -// CHECK1: omp.dispatch.cleanup: -// CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] -// CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP10]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_CLEANUP:.*]] +// CHECK1: [[OMP_DISPATCH_CLEANUP]]: +// CHECK1-NEXT: br label %[[OMP_DISPATCH_END:.*]] +// CHECK1: [[OMP_DISPATCH_BODY]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] +// CHECK1: [[OMP_INNER_FOR_COND]]: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP21]], 1 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP12]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]] +// CHECK1: [[OMP_INNER_FOR_COND_CLEANUP]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_END:.*]] +// CHECK1: [[OMP_INNER_FOR_BODY]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP14]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP15]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to float -// CHECK1-NEXT: store float [[CONV]], ptr [[REF_TMP15]], align 4, !tbaa [[TBAA19]] +// CHECK1-NEXT: store float [[CONV]], ptr [[REF_TMP15]], align 4, !tbaa [[FLOAT_TBAA14]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP16]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP25]] to float -// CHECK1-NEXT: store float [[CONV17]], ptr [[REF_TMP16]], align 4, !tbaa [[TBAA19]] +// CHECK1-NEXT: store float [[CONV17]], ptr [[REF_TMP16]], align 4, !tbaa [[FLOAT_TBAA14]] // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP15]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]] // CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP16]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP15]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP14]]) #[[ATTR4]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] +// CHECK1: [[OMP_BODY_CONTINUE]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] +// CHECK1: [[OMP_INNER_FOR_INC]]: +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND]] +// CHECK1: [[OMP_INNER_FOR_END]]: +// CHECK1-NEXT: br label %[[OMP_DISPATCH_INC:.*]] +// CHECK1: [[OMP_DISPATCH_INC]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]] -// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK1: omp.dispatch.end: +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[OMP_DISPATCH_COND]] +// CHECK1: [[OMP_DISPATCH_END]]: // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]]) // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) // CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP34]], 1 -// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK1: .omp.reduction.then: +// CHECK1-NEXT: br i1 [[TMP35]], [[DOTOMP_REDUCTION_THEN:label %.*]], [[DOTOMP_REDUCTION_DONE:label %.*]] +// CHECK1: [[_OMP_REDUCTION_THEN:.*:]] // CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[TMP2]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR11]] -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK1: .omp.reduction.done: +// CHECK1-NEXT: br [[DOTOMP_REDUCTION_DONE]] +// CHECK1: [[_OMP_REDUCTION_DONE:.*:]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I7]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: +// CHECK1-NEXT: br label %[[OMP_PRECOND_END]] +// CHECK1: [[OMP_PRECOND_END]]: // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]] @@ -353,32 +353,32 @@ void test() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E -// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E( +// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]] -// CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA23]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]] +// CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA23]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]], !nonnull [[META22]], !align [[META23]] // CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(ptr nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA27:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[FLOAT_TBAA24:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] -// CHECK1-NEXT: store float [[ADD]], ptr [[__RE_]], align 4, !tbaa [[TBAA27]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA23]] +// CHECK1-NEXT: store float [[ADD]], ptr [[__RE_]], align 4, !tbaa [[FLOAT_TBAA24]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]], !nonnull [[META22]], !align [[META23]] // CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(ptr nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA29:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[FLOAT_TBAA26:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] -// CHECK1-NEXT: store float [[ADD3]], ptr [[__IM_]], align 4, !tbaa [[TBAA29]] +// CHECK1-NEXT: store float [[ADD3]], ptr [[__IM_]], align 4, !tbaa [[FLOAT_TBAA26]] // CHECK1-NEXT: ret ptr [[THIS1]] // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @_omp_reduction_shuffle_and_reduce_func( +// CHECK1-SAME: ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 @@ -417,33 +417,33 @@ void test() { // CHECK1-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] // CHECK1-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] // CHECK1-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] -// CHECK1-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: +// CHECK1-NEXT: br i1 [[TMP29]], label %[[THEN:.*]], label %[[ELSE:.*]] +// CHECK1: [[THEN]]: // CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: +// CHECK1-NEXT: br label %[[IFCONT:.*]] +// CHECK1: [[ELSE]]: +// CHECK1-NEXT: br label %[[IFCONT]] +// CHECK1: [[IFCONT]]: // CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 // CHECK1-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] // CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK1: then4: +// CHECK1-NEXT: br i1 [[TMP32]], label %[[THEN4:.*]], label %[[ELSE5:.*]] +// CHECK1: [[THEN4]]: // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP36]], ptr align 8 [[TMP34]], i64 8, i1 false) -// CHECK1-NEXT: br label [[IFCONT6:%.*]] -// CHECK1: else5: -// CHECK1-NEXT: br label [[IFCONT6]] -// CHECK1: ifcont6: +// CHECK1-NEXT: br label %[[IFCONT6:.*]] +// CHECK1: [[ELSE5]]: +// CHECK1-NEXT: br label %[[IFCONT6]] +// CHECK1: [[IFCONT6]]: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @_omp_reduction_inter_warp_copy_func( +// CHECK1-SAME: ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 @@ -456,96 +456,96 @@ void test() { // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 -// CHECK1-NEXT: br label [[PRECOND:%.*]] -// CHECK1: precond: +// CHECK1-NEXT: br label %[[PRECOND:.*]] +// CHECK1: [[PRECOND]]: // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2 -// CHECK1-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK1: body: +// CHECK1-NEXT: br i1 [[TMP7]], label %[[BODY:.*]], label %[[EXIT:.*]] +// CHECK1: [[BODY]]: // CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: +// CHECK1-NEXT: br i1 [[WARP_MASTER]], label %[[THEN:.*]], label %[[ELSE:.*]] +// CHECK1: [[THEN]]: // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]] // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 // CHECK1-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: +// CHECK1-NEXT: br label %[[IFCONT:.*]] +// CHECK1: [[ELSE]]: +// CHECK1-NEXT: br label %[[IFCONT]] +// CHECK1: [[IFCONT]]: // CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]] -// CHECK1: then3: +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label %[[THEN3:.*]], label %[[ELSE4:.*]] +// CHECK1: [[THEN3]]: // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0 // CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]] // CHECK1-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 // CHECK1-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 -// CHECK1-NEXT: br label [[IFCONT5:%.*]] -// CHECK1: else4: -// CHECK1-NEXT: br label [[IFCONT5]] -// CHECK1: ifcont5: +// CHECK1-NEXT: br label %[[IFCONT5:.*]] +// CHECK1: [[ELSE4]]: +// CHECK1-NEXT: br label %[[IFCONT5]] +// CHECK1: [[IFCONT5]]: // CHECK1-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR]], align 4 -// CHECK1-NEXT: br label [[PRECOND]] -// CHECK1: exit: +// CHECK1-NEXT: br label %[[PRECOND]] +// CHECK1: [[EXIT]]: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_wrapper( +// CHECK1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA30:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[SHORT_TBAA27:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA32:![0-9]+]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA6]] // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA32]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA6]] // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA34:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA6]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 -// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define weak_odr protected ptx_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16( +// CHECK1-SAME: ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[ANYPTR_TBAA6]] // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK1: user_code.entry: +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] +// CHECK1: [[USER_CODE_ENTRY]]: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void -// CHECK1: worker.exit: +// CHECK1: [[WORKER_EXIT]]: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined( +// CHECK1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -558,82 +558,82 @@ void test() { // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]] +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 16) // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[IB]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] +// CHECK1: [[OMP_INNER_FOR_COND]]: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]] +// CHECK1: [[OMP_INNER_FOR_COND_CLEANUP]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_END:.*]] +// CHECK1: [[OMP_INNER_FOR_BODY]]: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA36:![0-9]+]] +// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[DOUBLE_TBAA29:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP2]]) #[[ATTR4]] -// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP2]], align 8, !tbaa [[TBAA36]] +// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP2]], align 8, !tbaa [[DOUBLE_TBAA29]] // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP2]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA21]] +// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[ANYPTR_TBAA16]] // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA21]] +// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[ANYPTR_TBAA16]] // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA21]] +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[ANYPTR_TBAA16]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] +// CHECK1: [[OMP_BODY_CONTINUE]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] +// CHECK1: [[OMP_INNER_FOR_INC]]: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND]] +// CHECK1: [[OMP_INNER_FOR_END]]: +// CHECK1-NEXT: br label %[[OMP_LOOP_EXIT:.*]] +// CHECK1: [[OMP_LOOP_EXIT]]: // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[IB]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] @@ -647,15 +647,15 @@ void test() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_ -// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden void @_ZNSt7complexIdEC1ERKdS2_( +// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38:![0-9]+]] -// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA40:![0-9]+]] -// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA40]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31:![0-9]+]] +// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33:![0-9]+]] +// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8 @@ -663,9 +663,9 @@ void test() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined( +// CHECK1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8 @@ -689,155 +689,155 @@ void test() { // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA38]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA38]] +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] +// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] +// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]] +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]], !nonnull [[META22]], !align [[META23]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[INTPTR_TBAA12]], !nonnull [[META22]], !align [[META23]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]], !nonnull [[META22]], !align [[META35:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: +// CHECK1-NEXT: br i1 [[CMP]], label %[[OMP_PRECOND_THEN:.*]], label %[[OMP_PRECOND_END:.*]] +// CHECK1: [[OMP_PRECOND_THEN]]: // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA36]] +// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[DOUBLE_TBAA29]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP6]]) #[[ATTR4]] -// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP6]], align 8, !tbaa [[TBAA36]] +// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP6]], align 8, !tbaa [[DOUBLE_TBAA29]] // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP6]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I7]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br label %[[OMP_DISPATCH_COND:.*]] +// CHECK1: [[OMP_DISPATCH_COND]]: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP8]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], %[[COND_TRUE]] ], [ [[TMP16]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP19]], 1 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] -// CHECK1: omp.dispatch.cleanup: -// CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] -// CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP10]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_CLEANUP:.*]] +// CHECK1: [[OMP_DISPATCH_CLEANUP]]: +// CHECK1-NEXT: br label %[[OMP_DISPATCH_END:.*]] +// CHECK1: [[OMP_DISPATCH_BODY]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] +// CHECK1: [[OMP_INNER_FOR_COND]]: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP21]], 1 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br i1 [[CMP12]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]] +// CHECK1: [[OMP_INNER_FOR_COND_CLEANUP]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_END:.*]] +// CHECK1: [[OMP_INNER_FOR_BODY]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP14]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP15]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to double -// CHECK1-NEXT: store double [[CONV]], ptr [[REF_TMP15]], align 8, !tbaa [[TBAA36]] +// CHECK1-NEXT: store double [[CONV]], ptr [[REF_TMP15]], align 8, !tbaa [[DOUBLE_TBAA29]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP16]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK1-NEXT: store double [[CONV17]], ptr [[REF_TMP16]], align 8, !tbaa [[TBAA36]] +// CHECK1-NEXT: store double [[CONV17]], ptr [[REF_TMP16]], align 8, !tbaa [[DOUBLE_TBAA29]] // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP15]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]] // CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP16]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP15]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP14]]) #[[ATTR4]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] +// CHECK1: [[OMP_BODY_CONTINUE]]: +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] +// CHECK1: [[OMP_INNER_FOR_INC]]: +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[OMP_INNER_FOR_COND]] +// CHECK1: [[OMP_INNER_FOR_END]]: +// CHECK1-NEXT: br label %[[OMP_DISPATCH_INC:.*]] +// CHECK1: [[OMP_DISPATCH_INC]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]] -// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK1: omp.dispatch.end: +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[INT_TBAA10]] +// CHECK1-NEXT: br label %[[OMP_DISPATCH_COND]] +// CHECK1: [[OMP_DISPATCH_END]]: // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]]) // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2) // CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP34]], 1 -// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK1: .omp.reduction.then: +// CHECK1-NEXT: br i1 [[TMP35]], [[DOTOMP_REDUCTION_THEN:label %.*]], [[DOTOMP_REDUCTION_DONE:label %.*]] +// CHECK1: [[_OMP_REDUCTION_THEN:.*:]] // CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[TMP2]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR11]] -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK1: .omp.reduction.done: +// CHECK1-NEXT: br [[DOTOMP_REDUCTION_DONE]] +// CHECK1: [[_OMP_REDUCTION_DONE:.*:]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I7]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]] -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: +// CHECK1-NEXT: br label %[[OMP_PRECOND_END]] +// CHECK1: [[OMP_PRECOND_END]]: // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]] @@ -845,32 +845,32 @@ void test() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E -// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E( +// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]] -// CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA38]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]] +// CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA38]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]], !nonnull [[META22]], !align [[META35]] // CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(ptr nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA42:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[DOUBLE_TBAA36:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] -// CHECK1-NEXT: store double [[ADD]], ptr [[__RE_]], align 8, !tbaa [[TBAA42]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA38]] +// CHECK1-NEXT: store double [[ADD]], ptr [[__RE_]], align 8, !tbaa [[DOUBLE_TBAA36]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]], !nonnull [[META22]], !align [[META35]] // CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(ptr nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA44:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[DOUBLE_TBAA38:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] -// CHECK1-NEXT: store double [[ADD3]], ptr [[__IM_]], align 8, !tbaa [[TBAA44]] +// CHECK1-NEXT: store double [[ADD3]], ptr [[__IM_]], align 8, !tbaa [[DOUBLE_TBAA38]] // CHECK1-NEXT: ret ptr [[THIS1]] // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1 -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @_omp_reduction_shuffle_and_reduce_func1( +// CHECK1-SAME: ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: [[ENTRY:.*]]: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 @@ -889,17 +889,17 @@ void test() { // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr %"class.std::complex.0", ptr [[TMP9]], i64 1 -// CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] -// CHECK1: .shuffle.pre_cond: -// CHECK1-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] -// CHECK1-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] +// CHECK1-NEXT: br [[DOTSHUFFLE_PRE_COND:label %.*]] +// CHECK1: [[_SHUFFLE_PRE_COND:.*:]] +// CHECK1-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], %[[ENTRY]] ], [ [[TMP23:%.*]], %[[DOTSHUFFLE_THEN:.*]] ] +// CHECK1-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT]], %[[ENTRY]] ], [ [[TMP24:%.*]], %[[DOTSHUFFLE_THEN]] ] // CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 // CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] // CHECK1-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] -// CHECK1: .shuffle.then: +// CHECK1-NEXT: br i1 [[TMP18]], label %[[DOTSHUFFLE_THEN]], [[DOTSHUFFLE_EXIT:label %.*]] +// CHECK1: [[_SHUFFLE_THEN:.*:]] // CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK1-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 @@ -907,8 +907,8 @@ void test() { // CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 // CHECK1-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 -// CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND]] -// CHECK1: .shuffle.exit: +// CHECK1-NEXT: br [[DOTSHUFFLE_PRE_COND]] +// CHECK1: [[_SHUFFLE_EXIT:.*:]] // CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 // CHECK1-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 @@ -922,33 +922,33 @@ void test() { // CHECK1-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] // CHECK1-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] // CHECK1-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] -// CHECK1-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: +// CHECK1-NEXT: br i1 [[TMP36]], label %[[THEN:.*]], label %[[ELSE:.*]] +// CHECK1: [[THEN]]: // CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: +// CHECK1-NEXT: br label %[[IFCONT:.*]] +// CHECK1: [[ELSE]]: +// CHECK1-NEXT: br label %[[IFCONT]] +// CHECK1: [[IFCONT]]: // CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] // CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK1: then4: +// CHECK1-NEXT: br i1 [[TMP39]], label %[[THEN4:.*]], label %[[ELSE5:.*]] +// CHECK1: [[THEN4]]: // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP43]], ptr align 8 [[TMP41]], i64 16, i1 false) -// CHECK1-NEXT: br label [[IFCONT6:%.*]] -// CHECK1: else5: -// CHECK1-NEXT: br label [[IFCONT6]] -// CHECK1: ifcont6: +// CHECK1-NEXT: br label %[[IFCONT6:.*]] +// CHECK1: [[ELSE5]]: +// CHECK1-NEXT: br label %[[IFCONT6]] +// CHECK1: [[IFCONT6]]: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2 -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @_omp_reduction_inter_warp_copy_func2( +// CHECK1-SAME: ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 @@ -961,154 +961,189 @@ void test() { // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 -// CHECK1-NEXT: br label [[PRECOND:%.*]] -// CHECK1: precond: +// CHECK1-NEXT: br label %[[PRECOND:.*]] +// CHECK1: [[PRECOND]]: // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 4 -// CHECK1-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK1: body: +// CHECK1-NEXT: br i1 [[TMP7]], label %[[BODY:.*]], label %[[EXIT:.*]] +// CHECK1: [[BODY]]: // CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: +// CHECK1-NEXT: br i1 [[WARP_MASTER]], label %[[THEN:.*]], label %[[ELSE:.*]] +// CHECK1: [[THEN]]: // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]] // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 // CHECK1-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: +// CHECK1-NEXT: br label %[[IFCONT:.*]] +// CHECK1: [[ELSE]]: +// CHECK1-NEXT: br label %[[IFCONT]] +// CHECK1: [[IFCONT]]: // CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]] -// CHECK1: then3: +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label %[[THEN3:.*]], label %[[ELSE4:.*]] +// CHECK1: [[THEN3]]: // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0 // CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]] // CHECK1-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 // CHECK1-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 -// CHECK1-NEXT: br label [[IFCONT5:%.*]] -// CHECK1: else4: -// CHECK1-NEXT: br label [[IFCONT5]] -// CHECK1: ifcont5: +// CHECK1-NEXT: br label %[[IFCONT5:.*]] +// CHECK1: [[ELSE4]]: +// CHECK1-NEXT: br label %[[IFCONT5]] +// CHECK1: [[IFCONT5]]: // CHECK1-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR]], align 4 -// CHECK1-NEXT: br label [[PRECOND]] -// CHECK1: exit: +// CHECK1-NEXT: br label %[[PRECOND]] +// CHECK1: [[EXIT]]: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8]] { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_wrapper( +// CHECK1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8]] { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA30]] -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA15]] +// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[SHORT_TBAA27]] +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[INT_TBAA10]] // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA32]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA6]] // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA32]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[ANYPTR_TBAA6]] // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA45:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA6]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_ -// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden void @_ZNSt7complexIfEC2ERKfS2_( +// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]] -// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA25]] -// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA25]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]] +// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]] +// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA25]] -// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4, !tbaa [[TBAA19]] -// CHECK1-NEXT: store float [[TMP1]], ptr [[__RE_]], align 4, !tbaa [[TBAA27]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]], !nonnull [[META22]], !align [[META23]] +// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4, !tbaa [[FLOAT_TBAA14]] +// CHECK1-NEXT: store float [[TMP1]], ptr [[__RE_]], align 4, !tbaa [[FLOAT_TBAA24]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA25]] -// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[TBAA19]] -// CHECK1-NEXT: store float [[TMP3]], ptr [[__IM_]], align 4, !tbaa [[TBAA29]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[FLOATPTR_TBAA20]], !nonnull [[META22]], !align [[META23]] +// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[FLOAT_TBAA14]] +// CHECK1-NEXT: store float [[TMP3]], ptr [[__IM_]], align 4, !tbaa [[FLOAT_TBAA26]] // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv -// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden float @_ZNKSt7complexIfE4realEv( +// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA27]] +// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[FLOAT_TBAA24]] // CHECK1-NEXT: ret float [[TMP0]] // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv -// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden float @_ZNKSt7complexIfE4imagEv( +// CHECK1-SAME: ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIFEPTR_TBAA18]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[FLOAT_TBAA26]] // CHECK1-NEXT: ret float [[TMP0]] // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_ -// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden void @_ZNSt7complexIdEC2ERKdS2_( +// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]] -// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA40]] -// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA40]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]] +// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]] +// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA40]] -// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !tbaa [[TBAA36]] -// CHECK1-NEXT: store double [[TMP1]], ptr [[__RE_]], align 8, !tbaa [[TBAA42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]], !nonnull [[META22]], !align [[META35]] +// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !tbaa [[DOUBLE_TBAA29]] +// CHECK1-NEXT: store double [[TMP1]], ptr [[__RE_]], align 8, !tbaa [[DOUBLE_TBAA36]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA40]] -// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA36]] -// CHECK1-NEXT: store double [[TMP3]], ptr [[__IM_]], align 8, !tbaa [[TBAA44]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[DOUBLEPTR_TBAA33]], !nonnull [[META22]], !align [[META35]] +// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[DOUBLE_TBAA29]] +// CHECK1-NEXT: store double [[TMP3]], ptr [[__IM_]], align 8, !tbaa [[DOUBLE_TBAA38]] // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv -// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden double @_ZNKSt7complexIdE4realEv( +// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[DOUBLE_TBAA36]] // CHECK1-NEXT: ret double [[TMP0]] // // -// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv -// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { -// CHECK1-NEXT: entry: +// CHECK1-LABEL: define linkonce_odr hidden double @_ZNKSt7complexIdE4imagEv( +// CHECK1-SAME: ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]] +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[_ZTSST7COMPLEXIDEPTR_TBAA31]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA44]] +// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[DOUBLE_TBAA38]] // CHECK1-NEXT: ret double [[TMP0]] // +//. +// CHECK1: [[ANYPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK1: [[META7]] = !{!"any pointer", [[META8:![0-9]+]], i64 0} +// CHECK1: [[META8]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK1: [[META9]] = !{!"Simple C++ TBAA"} +// CHECK1: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +// CHECK1: [[META11]] = !{!"int", [[META8]], i64 0} +// CHECK1: [[INTPTR_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// CHECK1: [[META13]] = !{!"p1 int", [[META7]], i64 0} +// CHECK1: [[FLOAT_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +// CHECK1: [[META15]] = !{!"float", [[META8]], i64 0} +// CHECK1: [[ANYPTR_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +// CHECK1: [[META17]] = !{!"any p2 pointer", [[META7]], i64 0} +// CHECK1: [[_ZTSST7COMPLEXIFEPTR_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// CHECK1: [[META19]] = !{!"p1 _ZTSSt7complexIfE", [[META7]], i64 0} +// CHECK1: [[FLOATPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +// CHECK1: [[META21]] = !{!"p1 float", [[META7]], i64 0} +// CHECK1: [[META22]] = !{} +// CHECK1: [[META23]] = !{i64 4} +// CHECK1: [[FLOAT_TBAA24]] = !{[[META25:![0-9]+]], [[META15]], i64 0} +// CHECK1: [[META25]] = !{!"_ZTSSt7complexIfE", [[META15]], i64 0, [[META15]], i64 4} +// CHECK1: [[FLOAT_TBAA26]] = !{[[META25]], [[META15]], i64 4} +// CHECK1: [[SHORT_TBAA27]] = !{[[META28:![0-9]+]], [[META28]], i64 0} +// CHECK1: [[META28]] = !{!"short", [[META8]], i64 0} +// CHECK1: [[DOUBLE_TBAA29]] = !{[[META30:![0-9]+]], [[META30]], i64 0} +// CHECK1: [[META30]] = !{!"double", [[META8]], i64 0} +// CHECK1: [[_ZTSST7COMPLEXIDEPTR_TBAA31]] = !{[[META32:![0-9]+]], [[META32]], i64 0} +// CHECK1: [[META32]] = !{!"p1 _ZTSSt7complexIdE", [[META7]], i64 0} +// CHECK1: [[DOUBLEPTR_TBAA33]] = !{[[META34:![0-9]+]], [[META34]], i64 0} +// CHECK1: [[META34]] = !{!"p1 double", [[META7]], i64 0} +// CHECK1: [[META35]] = !{i64 8} +// CHECK1: [[DOUBLE_TBAA36]] = !{[[META37:![0-9]+]], [[META30]], i64 0} +// CHECK1: [[META37]] = !{!"_ZTSSt7complexIdE", [[META30]], i64 0, [[META30]], i64 8} +// CHECK1: [[DOUBLE_TBAA38]] = !{[[META37]], [[META30]], i64 8} +//. diff --git a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp index 1c6a56239204c..268b39087f4bd 100644 --- a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp +++ b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --include-generated-funcs --version 6 // RUN: %clang_cc1 -x c++ -O1 -fopenmp-version=45 -disable-llvm-optzns -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK // RUN: %clang_cc1 -x c++ -O1 -fopenmp-version=45 -disable-llvm-optzns -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm -fno-inline %s -o - | FileCheck %s --check-prefix=CHECK-NOINLINE // expected-no-diagnostics @@ -15,14 +15,14 @@ void foo() { #endif // CHECK: Function Attrs: mustprogress nounwind -// CHECK-LABEL: define {{[^@]+}}@_Z3foov -// CHECK-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define dso_local void @_Z3foov( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] +// CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] // CHECK-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK-NEXT: call void @_Z3foov.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) @@ -31,36 +31,36 @@ void foo() { // // // CHECK: Function Attrs: noinline norecurse nounwind -// CHECK-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define internal void @_Z3foov.omp_outlined( +// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7:![0-9]+]] +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]] // CHECK-NEXT: ret void // // // CHECK: Function Attrs: alwaysinline norecurse nounwind -// CHECK-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-LABEL: define internal void @_Z3foov.omp_outlined.1( +// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]] +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]] // CHECK-NEXT: ret void // // // CHECK-NOINLINE: Function Attrs: mustprogress noinline nounwind -// CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov -// CHECK-NOINLINE-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK-NOINLINE-NEXT: entry: +// CHECK-NOINLINE-LABEL: define dso_local void @_Z3foov( +// CHECK-NOINLINE-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NOINLINE-NEXT: [[ENTRY:.*:]] // CHECK-NOINLINE-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NOINLINE-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NOINLINE-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NOINLINE-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] +// CHECK-NOINLINE-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] // CHECK-NOINLINE-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK-NOINLINE-NEXT: call void @_Z3foov.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK-NOINLINE-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) @@ -69,23 +69,40 @@ void foo() { // // // CHECK-NOINLINE: Function Attrs: noinline norecurse nounwind -// CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined -// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NOINLINE-NEXT: entry: +// CHECK-NOINLINE-LABEL: define internal void @_Z3foov.omp_outlined( +// CHECK-NOINLINE-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NOINLINE-NEXT: [[ENTRY:.*:]] // CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NOINLINE-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-NOINLINE-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7:![0-9]+]] +// CHECK-NOINLINE-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]] // CHECK-NOINLINE-NEXT: ret void // // // CHECK-NOINLINE: Function Attrs: alwaysinline norecurse nounwind -// CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.1 -// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK-NOINLINE-NEXT: entry: +// CHECK-NOINLINE-LABEL: define internal void @_Z3foov.omp_outlined.1( +// CHECK-NOINLINE-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NOINLINE-NEXT: [[ENTRY:.*:]] // CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NOINLINE-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] -// CHECK-NOINLINE-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]] +// CHECK-NOINLINE-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[INTPTR_TBAA7]] // CHECK-NOINLINE-NEXT: ret void // +//. +// CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"Simple C++ TBAA"} +// CHECK: [[INTPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"p1 int", [[META9:![0-9]+]], i64 0} +// CHECK: [[META9]] = !{!"any pointer", [[META5]], i64 0} +//. +// CHECK-NOINLINE: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-NOINLINE: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-NOINLINE: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-NOINLINE: [[META6]] = !{!"Simple C++ TBAA"} +// CHECK-NOINLINE: [[INTPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK-NOINLINE: [[META8]] = !{!"p1 int", [[META9:![0-9]+]], i64 0} +// CHECK-NOINLINE: [[META9]] = !{!"any pointer", [[META5]], i64 0} +//. diff --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp index 82dd07a1a63bb..cddd31da1b7fb 100644 --- a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp +++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 6 // RUN: %clang_cc1 -fopenmp -O1 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK // expected-no-diagnostics #ifndef HEADER @@ -34,31 +34,32 @@ struct S { #endif + // CHECK-LABEL: define noundef i32 @main( // CHECK-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1:[0-9]+]]) -// CHECK-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3:![0-9]+]] -// CHECK-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] +// CHECK-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8, !tbaa [[CHARPTR_TBAA7:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr nonnull @.omp_task_entry..2) // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]] -// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8, !tbaa [[TBAA10:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3]] +// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8, !tbaa [[INT_TBAA11:![0-9]+]] // CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0 // CHECK-NEXT: br i1 [[DOTNOT]], label %[[OMP_IF_END:.*]], label %[[OMP_IF_THEN:.*]] // CHECK: [[OMP_IF_THEN]]: -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3]] // CHECK-NEXT: [[TMP7:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..4) // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 40 -// CHECK-NEXT: store i64 0, ptr [[TMP8]], align 8, !tbaa [[TBAA15:![0-9]+]] +// CHECK-NEXT: store i64 0, ptr [[TMP8]], align 8, !tbaa [[LONG_TBAA15:![0-9]+]] // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 48 -// CHECK-NEXT: store i64 9, ptr [[TMP9]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 9, ptr [[TMP9]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 56 -// CHECK-NEXT: store i64 1, ptr [[TMP10]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 1, ptr [[TMP10]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 72 // CHECK-NEXT: store i64 0, ptr [[TMP11]], align 8 // CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP6]] to i64 @@ -71,32 +72,32 @@ struct S { // CHECK-NEXT: br i1 [[DOTNOT22]], label %[[OMP_IF_END17:.*]], label %[[OMP_IF_THEN2:.*]] // CHECK: [[OMP_IF_THEN2]]: // CHECK-NEXT: tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]] -// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3]] +// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !tbaa [[CHARPTR_TBAA7]] // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP15]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA17:![0-9]+]] +// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[CHARPTR_TBAA17:![0-9]+]] // CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1, !tbaa [[TBAA19:![0-9]+]] +// CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1, !tbaa [[CHAR_TBAA19:![0-9]+]] // CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP17]] to i32 // CHECK-NEXT: [[SUB12:%.*]] = sub i32 [[CONV]], [[TMP14]] // CHECK-NEXT: [[CONV15:%.*]] = zext i32 [[SUB12]] to i64 // CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV15]], [[IDXPROM]] // CHECK-NEXT: [[SUB16:%.*]] = add nsw i64 [[MUL]], -1 // CHECK-NEXT: [[TMP18:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..6) -// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[TBAA20:![0-9]+]] -// CHECK-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP19]], align 8, !tbaa [[TBAA23:![0-9]+]] +// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]] +// CHECK-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP19]], align 8, !tbaa [[INTPTR_TBAA23:![0-9]+]] // CHECK-NEXT: [[AGG_CAPTURED3_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP19]], i64 8 -// CHECK-NEXT: store ptr [[ARGV_ADDR]], ptr [[AGG_CAPTURED3_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA25:![0-9]+]] -// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]] +// CHECK-NEXT: store ptr [[ARGV_ADDR]], ptr [[AGG_CAPTURED3_SROA_2_0__SROA_IDX]], align 8, !tbaa [[CHARPTR_TBAA25:![0-9]+]] +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[INT_TBAA3]] // CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK-NEXT: [[TMP21:%.*]] = sext i1 [[TOBOOL]] to i32 // CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 40 -// CHECK-NEXT: store i64 0, ptr [[TMP22]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 0, ptr [[TMP22]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 48 -// CHECK-NEXT: store i64 [[SUB16]], ptr [[TMP23]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 [[SUB16]], ptr [[TMP23]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 56 -// CHECK-NEXT: store i64 1, ptr [[TMP24]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 1, ptr [[TMP24]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 72 // CHECK-NEXT: store i64 0, ptr [[TMP25]], align 8 // CHECK-NEXT: call void @__kmpc_taskloop_5(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP18]], i32 [[TMP21]], ptr nonnull [[TMP22]], ptr nonnull [[TMP23]], i64 1, i32 1, i32 2, i64 4, i32 1, ptr null) #[[ATTR1]] @@ -111,11 +112,11 @@ struct S { // CHECK-NEXT: call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: [[TMP27:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..8) // CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 40 -// CHECK-NEXT: store i64 0, ptr [[TMP28]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 0, ptr [[TMP28]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 48 -// CHECK-NEXT: store i64 9, ptr [[TMP29]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 9, ptr [[TMP29]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 56 -// CHECK-NEXT: store i64 1, ptr [[TMP30]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 1, ptr [[TMP30]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 72 // CHECK-NEXT: store i64 0, ptr [[TMP31]], align 8 // CHECK-NEXT: call void @__kmpc_taskloop(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP27]], i32 1, ptr nonnull [[TMP28]], ptr nonnull [[TMP29]], i64 1, i32 1, i32 0, i64 0, ptr null) @@ -126,33 +127,160 @@ struct S { // CHECK-NEXT: ret i32 0 // // +// CHECK-LABEL: define internal noundef i32 @.omp_task_entry.( +// CHECK-SAME: i32 [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8, !tbaa [[LONG_TBAA28:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48 +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8, !tbaa [[LONG_TBAA29:![0-9]+]] +// CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[TMP3]], 32 +// CHECK-NEXT: [[TMP6:%.*]] = ashr exact i64 [[SEXT]], 32 +// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND_I:.*]] +// CHECK: [[OMP_INNER_FOR_COND_I]]: +// CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[OMP_INNER_FOR_COND_I]] ], [ [[TMP6]], %[[ENTRY]] ] +// CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP5]], [[INDVARS_IV]] +// CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 +// CHECK-NEXT: br i1 [[CMP_NOT_I]], [[DOTOMP_OUTLINED__1_EXIT:label %.*]], label %[[OMP_INNER_FOR_COND_I]] +// CHECK: [[_OMP_OUTLINED__1_EXIT:.*:]] +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..2( +// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) +// CHECK-NEXT: [[DOTNOT_I:%.*]] = icmp eq i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[DOTNOT_I]], [[DOTOMP_OUTLINED__EXIT:label %.*]], label %[[OMP_IF_THEN_I:.*]] +// CHECK: [[OMP_IF_THEN_I]]: +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 +// CHECK-NEXT: tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[INT_TBAA3]] +// CHECK-NEXT: [[TMP5:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 33, i64 80, i64 1, ptr nonnull @.omp_task_entry.) +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 32 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP6]], align 8, !tbaa [[CHAR_TBAA19]] +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 40 +// CHECK-NEXT: store i64 0, ptr [[TMP7]], align 8, !tbaa [[LONG_TBAA15]] +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 48 +// CHECK-NEXT: store i64 9, ptr [[TMP8]], align 8, !tbaa [[LONG_TBAA15]] +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 56 +// CHECK-NEXT: store i64 1, ptr [[TMP9]], align 8, !tbaa [[LONG_TBAA15]] +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 72 +// CHECK-NEXT: store i64 0, ptr [[TMP10]], align 8 +// CHECK-NEXT: tail call void @__kmpc_taskloop(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP5]], i32 1, ptr nonnull [[TMP7]], ptr nonnull [[TMP8]], i64 1, i32 1, i32 0, i64 0, ptr null) +// CHECK-NEXT: tail call void @__kmpc_end_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) +// CHECK-NEXT: tail call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) +// CHECK-NEXT: br [[DOTOMP_OUTLINED__EXIT]] +// CHECK: [[_OMP_OUTLINED__EXIT:.*:]] +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..4( +// CHECK-SAME: i32 [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8, !tbaa [[LONG_TBAA28]] +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48 +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8, !tbaa [[LONG_TBAA29]] +// CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[TMP3]], 32 +// CHECK-NEXT: [[TMP6:%.*]] = ashr exact i64 [[SEXT]], 32 +// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND_I:.*]] +// CHECK: [[OMP_INNER_FOR_COND_I]]: +// CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[OMP_INNER_FOR_COND_I]] ], [ [[TMP6]], %[[ENTRY]] ] +// CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP5]], [[INDVARS_IV]] +// CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 +// CHECK-NEXT: br i1 [[CMP_NOT_I]], [[DOTOMP_OUTLINED__3_EXIT:label %.*]], label %[[OMP_INNER_FOR_COND_I]] +// CHECK: [[_OMP_OUTLINED__3_EXIT:.*:]] +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..6( +// CHECK-SAME: i32 [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[ANYPTR_TBAA20]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !tbaa [[LONG_TBAA28]] +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8, !tbaa [[LONG_TBAA29]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[INTPTR_TBAA33:![0-9]+]], !alias.scope [[META30]], !nonnull [[META35:![0-9]+]], !align [[META36:![0-9]+]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[INT_TBAA3]], !noalias [[META30]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[TMP8]], 0 +// CHECK-NEXT: br i1 [[CMP_I]], label %[[LAND_LHS_TRUE_I:.*]], [[DOTOMP_OUTLINED__5_EXIT:label %.*]] +// CHECK: [[LAND_LHS_TRUE_I]]: +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !tbaa [[CHARPTR_TBAA37:![0-9]+]], !alias.scope [[META30]], !nonnull [[META35]], !align [[META38:![0-9]+]] +// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8, !tbaa [[CHARPTR_TBAA7]], !noalias [[META30]] +// CHECK-NEXT: [[IDXPROM_I:%.*]] = zext nneg i32 [[TMP8]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP11]], i64 [[IDXPROM_I]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX_I]], align 8, !tbaa [[CHARPTR_TBAA17]], !noalias [[META30]] +// CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP12]], i64 [[IDXPROM_I]] +// CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX5_I]], align 1, !tbaa [[CHAR_TBAA19]], !noalias [[META30]] +// CHECK-NEXT: [[CONV_I:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[TMP8]], [[CONV_I]] +// CHECK-NEXT: br i1 [[CMP13_I]], label %[[OMP_INNER_FOR_COND_I:.*]], [[DOTOMP_OUTLINED__5_EXIT]] +// CHECK: [[OMP_INNER_FOR_COND_I]]: +// CHECK-NEXT: [[DOTOMP_IV_0_I:%.*]] = phi i64 [ [[ADD46_I:%.*]], %[[OMP_INNER_FOR_COND_I]] ], [ [[TMP4]], %[[LAND_LHS_TRUE_I]] ] +// CHECK-NEXT: [[CMP16_NOT_I:%.*]] = icmp ugt i64 [[DOTOMP_IV_0_I]], [[TMP6]] +// CHECK-NEXT: [[ADD46_I]] = add nsw i64 [[DOTOMP_IV_0_I]], 1 +// CHECK-NEXT: br i1 [[CMP16_NOT_I]], [[DOTOMP_OUTLINED__5_EXIT]], label %[[OMP_INNER_FOR_COND_I]] +// CHECK: [[_OMP_OUTLINED__5_EXIT:.*:]] +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..8( +// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8, !tbaa [[LONG_TBAA28]] +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48 +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8, !tbaa [[LONG_TBAA29]] +// CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[TMP3]], 32 +// CHECK-NEXT: [[CONV1_I2:%.*]] = ashr exact i64 [[SEXT]], 32 +// CHECK-NEXT: [[CMP_NOT_I3:%.*]] = icmp ult i64 [[TMP5]], [[CONV1_I2]] +// CHECK-NEXT: br i1 [[CMP_NOT_I3]], [[DOTOMP_OUTLINED__7_EXIT:label %.*]], label %[[OMP_INNER_FOR_BODY_I:.*]] +// CHECK: [[OMP_INNER_FOR_BODY_I]]: +// CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTCANCEL_CONTINUE_I:.*]] ], [ [[CONV1_I2]], %[[ENTRY]] ] +// CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @__kmpc_cancel(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 4) +// CHECK-NEXT: [[DOTNOT_I:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK-NEXT: br i1 [[DOTNOT_I]], label %[[DOTCANCEL_CONTINUE_I]], [[DOTOMP_OUTLINED__7_EXIT]] +// CHECK: [[_CANCEL_CONTINUE_I:.*:]] +// CHECK-NEXT: [[TMP7:%.*]] = tail call i32 @__kmpc_cancellationpoint(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 4) +// CHECK-NEXT: [[DOTNOT12_I:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 +// CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP5]], [[INDVARS_IV_NEXT]] +// CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[DOTNOT12_I]], i1 true, i1 [[CMP_NOT_I]] +// CHECK-NEXT: br i1 [[OR_COND]], [[DOTOMP_OUTLINED__7_EXIT]], label %[[OMP_INNER_FOR_BODY_I]] +// CHECK: [[_OMP_OUTLINED__7_EXIT:.*:]] +// CHECK-NEXT: ret i32 0 +// // // CHECK-LABEL: define linkonce_odr void @_ZN1SC2Ei( // CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[C:%.*]]) unnamed_addr #[[ATTR6:[0-9]+]] align 2 { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) -// CHECK-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4, !tbaa [[TBAA3]] +// CHECK-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4, !tbaa [[INT_TBAA3]] // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 // CHECK-NEXT: br i1 [[DOTNOT]], label %[[OMP_IF_END:.*]], label %[[OMP_IF_THEN:.*]] // CHECK: [[OMP_IF_THEN]]: -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[THIS]], align 4, !tbaa [[TBAA35:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[THIS]], align 4, !tbaa [[INT_TBAA39:![0-9]+]] // CHECK-NEXT: tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[C_ADDR]], align 4, !tbaa [[TBAA3]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[C_ADDR]], align 4, !tbaa [[INT_TBAA3]] // CHECK-NEXT: [[SUB4:%.*]] = add nsw i32 [[TMP3]], -1 // CHECK-NEXT: [[TMP4:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..10) -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA20]] -// CHECK-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8, !tbaa [[TBAA37:![0-9]+]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[ANYPTR_TBAA20]] +// CHECK-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8, !tbaa [[_ZTS1SPTR_TBAA41:![0-9]+]] // CHECK-NEXT: [[AGG_CAPTURED_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 8 -// CHECK-NEXT: store ptr [[C_ADDR]], ptr [[AGG_CAPTURED_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA23]] +// CHECK-NEXT: store ptr [[C_ADDR]], ptr [[AGG_CAPTURED_SROA_2_0__SROA_IDX]], align 8, !tbaa [[INTPTR_TBAA23]] // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 40 -// CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 48 // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[SUB4]] to i64 -// CHECK-NEXT: store i64 [[CONV]], ptr [[TMP7]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 [[CONV]], ptr [[TMP7]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 56 -// CHECK-NEXT: store i64 1, ptr [[TMP8]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: store i64 1, ptr [[TMP8]], align 8, !tbaa [[LONG_TBAA15]] // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 72 // CHECK-NEXT: store i64 0, ptr [[TMP9]], align 8 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP2]] to i64 @@ -162,4 +290,85 @@ struct S { // CHECK-NEXT: br label %[[OMP_IF_END]] // CHECK: [[OMP_IF_END]]: // CHECK-NEXT: ret void - +// +// +// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..10( +// CHECK-SAME: i32 [[TMP0:%.*]], ptr noalias noundef readonly captures(none) [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[ANYPTR_TBAA20]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !tbaa [[LONG_TBAA28]] +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8, !tbaa [[LONG_TBAA29]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[INTPTR_TBAA46:![0-9]+]], !alias.scope [[META43]], !nonnull [[META35]], !align [[META36]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[INT_TBAA3]], !noalias [[META43]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[TMP9]], 0 +// CHECK-NEXT: br i1 [[CMP_I]], label %[[TASKLOOP_IF_THEN_I:.*]], [[DOTOMP_OUTLINED__9_EXIT:label %.*]] +// CHECK: [[TASKLOOP_IF_THEN_I]]: +// CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[TMP4]], 32 +// CHECK-NEXT: [[TMP10:%.*]] = ashr exact i64 [[SEXT]], 32 +// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND_I:.*]] +// CHECK: [[OMP_INNER_FOR_COND_I]]: +// CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[OMP_INNER_FOR_COND_I]] ], [ [[TMP10]], %[[TASKLOOP_IF_THEN_I]] ] +// CHECK-NEXT: [[CMP8_NOT_I:%.*]] = icmp ult i64 [[TMP6]], [[INDVARS_IV]] +// CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 +// CHECK-NEXT: br i1 [[CMP8_NOT_I]], [[DOTOMP_OUTLINED__9_EXIT]], label %[[OMP_INNER_FOR_COND_I]] +// CHECK: [[_OMP_OUTLINED__9_EXIT:.*:]] +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define internal void @_GLOBAL__sub_I_taskloop_strictmodifier_codegen.cpp( +// CHECK-SAME: ) #[[ATTR7:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @_ZN1SC2Ei(ptr noundef nonnull align 4 dereferenceable(4) @s, i32 noundef 1) +// CHECK-NEXT: ret void +// +//. +// CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"Simple C++ TBAA"} +// CHECK: [[CHARPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"p2 omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK: [[META9]] = !{!"any p2 pointer", [[META10:![0-9]+]], i64 0} +// CHECK: [[META10]] = !{!"any pointer", [[META5]], i64 0} +// CHECK: [[INT_TBAA11]] = !{[[META12:![0-9]+]], [[META4]], i64 40} +// CHECK: [[META12]] = !{!"_ZTS24kmp_task_t_with_privates", [[META13:![0-9]+]], i64 0, [[META14:![0-9]+]], i64 40} +// CHECK: [[META13]] = !{!"_ZTS10kmp_task_t", [[META10]], i64 0, [[META10]], i64 8, [[META4]], i64 16, [[META5]], i64 24, [[META5]], i64 32} +// CHECK: [[META14]] = !{!"_ZTS15.kmp_privates.t", [[META4]], i64 0} +// CHECK: [[LONG_TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// CHECK: [[META16]] = !{!"long", [[META5]], i64 0} +// CHECK: [[CHARPTR_TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} +// CHECK: [[META18]] = !{!"p1 omnipotent char", [[META10]], i64 0} +// CHECK: [[CHAR_TBAA19]] = !{[[META5]], [[META5]], i64 0} +// CHECK: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META10]], i64 0} +// CHECK: [[META21]] = !{!"_ZTS24kmp_task_t_with_privates", [[META22:![0-9]+]], i64 0} +// CHECK: [[META22]] = !{!"_ZTS10kmp_task_t", [[META10]], i64 0, [[META10]], i64 8, [[META4]], i64 16, [[META5]], i64 24, [[META5]], i64 32, [[META16]], i64 40, [[META16]], i64 48, [[META16]], i64 56, [[META4]], i64 64, [[META10]], i64 72} +// CHECK: [[INTPTR_TBAA23]] = !{[[META24:![0-9]+]], [[META24]], i64 0} +// CHECK: [[META24]] = !{!"p1 int", [[META10]], i64 0} +// CHECK: [[CHARPTR_TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0} +// CHECK: [[META26]] = !{!"p3 omnipotent char", [[META27:![0-9]+]], i64 0} +// CHECK: [[META27]] = !{!"any p3 pointer", [[META9]], i64 0} +// CHECK: [[LONG_TBAA28]] = !{[[META21]], [[META16]], i64 40} +// CHECK: [[LONG_TBAA29]] = !{[[META21]], [[META16]], i64 48} +// CHECK: [[META30]] = !{[[META31:![0-9]+]]} +// CHECK: [[META31]] = distinct !{[[META31]], [[META32:![0-9]+]], !".omp_outlined..5: %__context"} +// CHECK: [[META32]] = distinct !{[[META32]], !".omp_outlined..5"} +// CHECK: [[INTPTR_TBAA33]] = !{[[META34:![0-9]+]], [[META24]], i64 0} +// CHECK: [[META34]] = !{!"_ZTSZ4mainE3$_3", [[META24]], i64 0, [[META26]], i64 8} +// CHECK: [[META35]] = !{} +// CHECK: [[META36]] = !{i64 4} +// CHECK: [[CHARPTR_TBAA37]] = !{[[META34]], [[META26]], i64 8} +// CHECK: [[META38]] = !{i64 8} +// CHECK: [[INT_TBAA39]] = !{[[META40:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META40]] = !{!"_ZTS1S", [[META4]], i64 0} +// CHECK: [[_ZTS1SPTR_TBAA41]] = !{[[META42:![0-9]+]], [[META42]], i64 0} +// CHECK: [[META42]] = !{!"p1 _ZTS1S", [[META10]], i64 0} +// CHECK: [[META43]] = !{[[META44:![0-9]+]]} +// CHECK: [[META44]] = distinct !{[[META44]], [[META45:![0-9]+]], !".omp_outlined..9: %__context"} +// CHECK: [[META45]] = distinct !{[[META45]], !".omp_outlined..9"} +// CHECK: [[INTPTR_TBAA46]] = !{[[META47:![0-9]+]], [[META24]], i64 8} +// CHECK: [[META47]] = !{!"_ZTSZN1SC1EiEUt_", [[META42]], i64 0, [[META24]], i64 8} +//. From 6885950931b8dd7a8c956bc1f3e8b8ac52dff8d2 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 12 Sep 2025 09:39:57 -0700 Subject: [PATCH 154/734] [SCEV] Fix a hang introduced by collectForPHI (#158153) If we have a phi where one of it's source blocks is an unreachable block, we don't want to traverse back into the unreachable region. Doing so allows e.g. finding a trivial self loop when walking back the predecessor chain. --- llvm/lib/Analysis/ScalarEvolution.cpp | 9 +++++++ ...t-guard-info-with-multiple-predecessors.ll | 26 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index ebb863076d2c5..5bcafd96f1aa5 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15457,6 +15457,12 @@ void ScalarEvolution::LoopGuards::collectFromPHI( const BasicBlock *InBlock = Phi.getIncomingBlock(IncomingIdx); if (!VisitedBlocks.insert(InBlock).second) return {nullptr, scCouldNotCompute}; + + // Avoid analyzing unreachable blocks so that we don't get trapped + // traversing cycles with ill-formed dominance or infinite cycles + if (!SE.DT.isReachableFromEntry(InBlock)) + return {nullptr, scCouldNotCompute}; + auto [G, Inserted] = IncomingGuards.try_emplace(InBlock, LoopGuards(SE)); if (Inserted) collectFromBlock(SE, G->second, Phi.getParent(), InBlock, VisitedBlocks, @@ -15511,6 +15517,9 @@ void ScalarEvolution::LoopGuards::collectFromBlock( ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, const BasicBlock *Block, const BasicBlock *Pred, SmallPtrSetImpl &VisitedBlocks, unsigned Depth) { + + assert(SE.DT.isReachableFromEntry(Block) && SE.DT.isReachableFromEntry(Pred)); + SmallVector ExprsToRewrite; auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS, diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll index 28035b05303db..564ce6b7d622f 100644 --- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll +++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll @@ -364,3 +364,29 @@ body: exit: ret void } + +define void @hang_due_to_unreachable_phi_inblock() personality ptr null { +bb: + br label %bb6 + +self-loop: ; preds = %self-loop + %dead = invoke ptr null() + to label %self-loop unwind label %bb4 + +bb4: ; preds = %self-loop + %i5 = landingpad { ptr, i32 } + cleanup + br label %bb6 + +bb6: ; preds = %bb4, %bb + %i7 = phi ptr [ null, %bb4 ], [ null, %bb ] + br label %bb8 + +bb8: ; preds = %bb8, %bb6 + %i9 = phi ptr [ null, %bb8 ], [ null, %bb6 ] + %i11 = icmp eq ptr %i9, null + br i1 %i11, label %bb12, label %bb8 + +bb12: ; preds = %bb8, %bb6 + ret void +} From bd7c2f15e8b9ff09cd415e7f8d01117cb0296e6e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 12 Sep 2025 09:45:53 -0700 Subject: [PATCH 155/734] [ADT] Simplify PointerBitMask in PointerIntPair.h (NFC) (#158210) A left shift of (uintptr_t)-1) is simpler. --- llvm/include/llvm/ADT/PointerIntPair.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h index 9cfc65846d5bf..e48e35d476c80 100644 --- a/llvm/include/llvm/ADT/PointerIntPair.h +++ b/llvm/include/llvm/ADT/PointerIntPair.h @@ -173,8 +173,7 @@ struct PointerIntPairInfo { "PointerIntPair with integer size too large for pointer"); enum MaskAndShiftConstants : uintptr_t { /// PointerBitMask - The bits that come from the pointer. - PointerBitMask = - ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1), + PointerBitMask = (~(uintptr_t)0) << PtrTraits::NumLowBitsAvailable, /// IntShift - The number of low bits that we reserve for other uses, and /// keep zero. From 615d07ea55ea57afab0205aa739239070448a038 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 12 Sep 2025 09:51:17 -0700 Subject: [PATCH 156/734] [lit] Implement ulimit builtin This patch implements ulimit inside the lit internal shell. Implementation wise, this functions similar to umask. But instead of setting the limits within the lit test worker process, we set environment variables and add a wrapper around the command to be executed. The wrapper then sets the limits. This is because we cannot increase the limits after lowering them, so we would otherwise end up with a lit test worker stuck with a lower limit. There are several tests where the use of ulimit is essential to the semantics of the test (two in clang, ~7 in compiler-rt), so we need to implement this in order to switch on the internal shell by default without losing test coverage. Reviewers: cmtice, petrhosek, ilovepi Reviewed By: cmtice, ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/157958 --- llvm/utils/lit/lit/TestRunner.py | 38 ++++++++++++++++++- .../builtin_commands/_launch_with_limit.py | 25 ++++++++++++ .../lit/tests/Inputs/shtest-ulimit/lit.cfg | 8 ++++ .../Inputs/shtest-ulimit/print_limits.py | 4 ++ .../Inputs/shtest-ulimit/ulimit-bad-arg.txt | 1 + .../Inputs/shtest-ulimit/ulimit_okay.txt | 5 +++ llvm/utils/lit/tests/shtest-ulimit.py | 18 +++++++++ 7 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt create mode 100644 llvm/utils/lit/tests/shtest-ulimit.py diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index a769919558a47..90c2c6479b004 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -92,11 +92,12 @@ class ShellEnvironment(object): we maintain a dir stack for pushd/popd. """ - def __init__(self, cwd, env, umask=-1): + def __init__(self, cwd, env, umask=-1, ulimit={}): self.cwd = cwd self.env = dict(env) self.umask = umask self.dirStack = [] + self.ulimit = ulimit def change_dir(self, newdir): if os.path.isabs(newdir): @@ -595,6 +596,27 @@ def executeBuiltinUmask(cmd, shenv): return ShellCommandResult(cmd, "", "", 0, False) +def executeBuiltinUlimit(cmd, shenv): + """executeBuiltinUlimit - Change the current limits.""" + if os.name != "posix": + raise InternalShellError(cmd, "'ulimit' not supported on this system") + if len(cmd.args) != 3: + raise InternalShellError(cmd, "'ulimit' requires two arguments") + try: + new_limit = int(cmd.args[2]) + except ValueError as err: + raise InternalShellError(cmd, "Error: 'ulimit': %s" % str(err)) + if cmd.args[1] == "-v": + shenv.ulimit["RLIMIT_AS"] = new_limit * 1024 + elif cmd.args[1] == "-n": + shenv.ulimit["RLIMIT_NOFILE"] = new_limit + else: + raise InternalShellError( + cmd, "'ulimit' does not support option: %s" % cmd.args[1] + ) + return ShellCommandResult(cmd, "", "", 0, False) + + def executeBuiltinColon(cmd, cmd_shenv): """executeBuiltinColon - Discard arguments and exit with status 0.""" return ShellCommandResult(cmd, "", "", 0, False) @@ -749,6 +771,7 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): "popd": executeBuiltinPopd, "pushd": executeBuiltinPushd, "rm": executeBuiltinRm, + "ulimit": executeBuiltinUlimit, "umask": executeBuiltinUmask, ":": executeBuiltinColon, } @@ -914,6 +937,19 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): if kIsWindows: args = quote_windows_command(args) + # Handle any resource limits. We do this by launching the command with + # a wrapper that sets the necessary limits. We use a wrapper rather than + # setting the limits in process as we cannot reraise the limits back to + # their defaults without elevated permissions. + if cmd_shenv.ulimit: + executable = sys.executable + args.insert(0, sys.executable) + args.insert(1, os.path.join(builtin_commands_dir, "_launch_with_limit.py")) + for limit in cmd_shenv.ulimit: + cmd_shenv.env["LIT_INTERNAL_ULIMIT_" + limit] = str( + cmd_shenv.ulimit[limit] + ) + try: # TODO(boomanaiden154): We currently wrap the subprocess.Popen with # os.umask as the umask argument in subprocess.Popen is not diff --git a/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py b/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py new file mode 100644 index 0000000000000..33d2d59ff0dbe --- /dev/null +++ b/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py @@ -0,0 +1,25 @@ +import sys +import subprocess +import resource +import os + +ULIMIT_ENV_VAR_PREFIX = "LIT_INTERNAL_ULIMIT_" + + +def main(argv): + command_args = argv[1:] + for env_var in os.environ: + if env_var.startswith(ULIMIT_ENV_VAR_PREFIX): + limit_str = env_var[len(ULIMIT_ENV_VAR_PREFIX) :] + limit_value = int(os.environ[env_var]) + limit = (limit_value, limit_value) + if limit_str == "RLIMIT_AS": + resource.setrlimit(resource.RLIMIT_AS, limit) + elif limit_str == "RLIMIT_NOFILE": + resource.setrlimit(resource.RLIMIT_NOFILE, limit) + process_output = subprocess.run(command_args) + sys.exit(process_output.returncode) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg new file mode 100644 index 0000000000000..c7bdc7e7b6bc0 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg @@ -0,0 +1,8 @@ +import lit.formats + +config.name = "shtest-ulimit" +config.suffixes = [".txt"] +config.test_format = lit.formats.ShTest(execute_external=False) +config.test_source_root = None +config.test_exec_root = None +config.substitutions.append(("%{python}", '"%s"' % (sys.executable))) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py b/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py new file mode 100644 index 0000000000000..632f954fa8fde --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py @@ -0,0 +1,4 @@ +import resource + +print("RLIMIT_AS=" + str(resource.getrlimit(resource.RLIMIT_AS)[0])) +print("RLIMIT_NOFILE=" + str(resource.getrlimit(resource.RLIMIT_NOFILE)[0])) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt new file mode 100644 index 0000000000000..efa22881047e9 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt @@ -0,0 +1 @@ +# RUN: ulimit -n diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt new file mode 100644 index 0000000000000..ad353b5d7c459 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt @@ -0,0 +1,5 @@ +# RUN: ulimit -v 1048576 +# RUN: ulimit -n 50 +# RUN: %{python} %S/print_limits.py +# Fail the test so that we can assert on the output. +# RUN: not echo return diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py new file mode 100644 index 0000000000000..8d7f436dc8af2 --- /dev/null +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -0,0 +1,18 @@ +# Check the ulimit command + +# ulimit does not work on non-POSIX platforms. +# UNSUPPORTED: system-windows + +# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit | FileCheck %s + +# CHECK: -- Testing: 2 tests{{.*}} + +# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}}) +# CHECK: ulimit -n +# CHECK: 'ulimit' requires two arguments + +# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}}) +# CHECK: ulimit -v 1048576 +# CHECK: ulimit -n 50 +# CHECK: RLIMIT_AS=1073741824 +# CHECK: RLIMIT_NOFILE=50 From ba9d1c41c41d568a798e0a8c38a89d294647c28d Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Fri, 12 Sep 2025 13:55:38 -0300 Subject: [PATCH 157/734] [clang] AST: remove DependentTemplateSpecializationType (#158109) A DependentTemplateSpecializationType (DTST) is basically just a TemplateSpecializationType (TST) with a hardcoded DependentTemplateName (DTN) as its TemplateName. This removes the DTST and replaces all uses of it with a TST, removing a lot of duplication in the implementation. Technically the hardcoded DTN is an optimization for a most common case, but the TST implementation is in better shape overall and with other optimizations, so this patch ends up being an overall performance positive: image A DTST also didn't allow a template name representing a DTN that was substituted, such as from an alias template, while the TST does allow it by the simple fact it can hold an arbitrary TemplateName, so this patch also increases the amount of sugar retained, while still being faster overall. Example (from included test case): ```C++ template class TT> using T1 = TT; template using T2 = T1; ``` Here we can now represent in the AST that `TT` was substituted for the dependent template name `T::template X`. --- .../utils/RenamerClangTidyCheck.cpp | 2 + clang-tools-extra/clangd/FindTarget.cpp | 28 +- .../clangd/SemanticHighlighting.cpp | 16 +- .../clangd/unittests/FindTargetTests.cpp | 3 +- .../include-cleaner/lib/WalkAST.cpp | 2 + clang/docs/ReleaseNotes.rst | 4 +- clang/include/clang/AST/ASTContext.h | 13 +- clang/include/clang/AST/ASTNodeTraverser.h | 5 - clang/include/clang/AST/RecursiveASTVisitor.h | 16 - clang/include/clang/AST/TemplateName.h | 4 +- clang/include/clang/AST/TypeBase.h | 64 +- clang/include/clang/AST/TypeLoc.h | 128 ---- clang/include/clang/AST/TypeProperties.td | 35 - clang/include/clang/ASTMatchers/ASTMatchers.h | 12 - clang/include/clang/Basic/TypeNodes.td | 10 +- clang/include/clang/Sema/HeuristicResolver.h | 2 +- clang/include/clang/Sema/Sema.h | 7 +- .../clang/Serialization/TypeBitCodes.def | 1 - clang/lib/AST/ASTContext.cpp | 155 +--- clang/lib/AST/ASTImporter.cpp | 19 - clang/lib/AST/ASTStructuralEquivalence.cpp | 14 - clang/lib/AST/ASTTypeTraits.cpp | 4 - clang/lib/AST/DeclTemplate.cpp | 2 + clang/lib/AST/ItaniumMangle.cpp | 51 +- clang/lib/AST/MicrosoftMangle.cpp | 6 - clang/lib/AST/ODRHash.cpp | 10 - clang/lib/AST/TemplateName.cpp | 18 +- clang/lib/AST/Type.cpp | 51 +- clang/lib/AST/TypeLoc.cpp | 32 - clang/lib/AST/TypePrinter.cpp | 17 - clang/lib/ASTMatchers/ASTMatchersInternal.cpp | 2 - clang/lib/ASTMatchers/Dynamic/Registry.cpp | 1 - clang/lib/Sema/HeuristicResolver.cpp | 13 +- clang/lib/Sema/SemaAttr.cpp | 4 +- clang/lib/Sema/SemaCXXScopeSpec.cpp | 57 +- clang/lib/Sema/SemaCoroutine.cpp | 6 +- clang/lib/Sema/SemaDecl.cpp | 6 - clang/lib/Sema/SemaDeclCXX.cpp | 8 +- clang/lib/Sema/SemaExpr.cpp | 5 +- clang/lib/Sema/SemaLookup.cpp | 16 +- clang/lib/Sema/SemaTemplate.cpp | 296 +++----- clang/lib/Sema/SemaTemplateDeduction.cpp | 49 +- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 5 +- clang/lib/Sema/SemaType.cpp | 9 - clang/lib/Sema/TreeTransform.h | 153 +--- clang/lib/Serialization/ASTReader.cpp | 14 - clang/lib/Serialization/ASTWriter.cpp | 13 - .../Refactoring/Rename/USRLocFinder.cpp | 6 - clang/lib/Tooling/Syntax/BuildTree.cpp | 7 - clang/test/AST/ast-dump-templates.cpp | 703 +++++++++++++----- clang/tools/libclang/CIndex.cpp | 13 - clang/tools/libclang/CXIndexDataConsumer.cpp | 2 - clang/unittests/AST/ASTImporterTest.cpp | 4 +- .../ASTMatchers/ASTMatchersNodeTest.cpp | 2 +- .../TypeSystem/Clang/TypeSystemClang.cpp | 8 - 55 files changed, 800 insertions(+), 1333 deletions(-) diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp index 90539eaabbe03..24d346bdfaa53 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -350,6 +350,8 @@ class RenamerClangTidyVisitor const TemplateDecl *Decl = Loc.getTypePtr()->getTemplateName().getAsTemplateDecl( /*IgnoreDeduced=*/true); + if (!Decl) + return true; if (const auto *ClassDecl = dyn_cast(Decl)) if (const NamedDecl *TemplDecl = ClassDecl->getTemplatedDecl()) diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index 32018d1bf3a84..8aae41420b83e 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -406,15 +406,6 @@ struct TargetFinder { } } } - void VisitDependentTemplateSpecializationType( - const DependentTemplateSpecializationType *DTST) { - if (Outer.Resolver) { - for (const NamedDecl *ND : - Outer.Resolver->resolveTemplateSpecializationType(DTST)) { - Outer.add(ND, Flags); - } - } - } void VisitTypedefType(const TypedefType *TT) { if (shouldSkipTypedef(TT->getDecl())) return; @@ -455,11 +446,13 @@ struct TargetFinder { // class template specializations have a (specialized) CXXRecordDecl. else if (const CXXRecordDecl *RD = TST->getAsCXXRecordDecl()) Outer.add(RD, Flags); // add(Decl) will despecialize if needed. - else { + else if (auto *TD = TST->getTemplateName().getAsTemplateDecl()) // fallback: the (un-specialized) declaration from primary template. - if (auto *TD = TST->getTemplateName().getAsTemplateDecl()) - Outer.add(TD->getTemplatedDecl(), Flags | Rel::TemplatePattern); - } + Outer.add(TD->getTemplatedDecl(), Flags | Rel::TemplatePattern); + else if (Outer.Resolver) + for (const NamedDecl *ND : + Outer.Resolver->resolveTemplateSpecializationType(TST)) + Outer.add(ND, Flags); } void VisitSubstTemplateTypeParmType(const SubstTemplateTypeParmType *STTPT) { @@ -900,15 +893,6 @@ refInTypeLoc(TypeLoc L, const HeuristicResolver *Resolver) { DeclRelation::Alias, Resolver)}); } - void VisitDependentTemplateSpecializationTypeLoc( - DependentTemplateSpecializationTypeLoc L) { - Refs.push_back( - ReferenceLoc{L.getQualifierLoc(), L.getTemplateNameLoc(), - /*IsDecl=*/false, - explicitReferenceTargets( - DynTypedNode::create(L.getType()), {}, Resolver)}); - } - void VisitDependentNameTypeLoc(DependentNameTypeLoc L) { Refs.push_back( ReferenceLoc{L.getQualifierLoc(), L.getNameLoc(), diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index 2b151b1274428..ab720ebe6b47f 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -728,11 +728,6 @@ class CollectExtraHighlightings return true; } - bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) { - H.addAngleBracketTokens(L.getLAngleLoc(), L.getRAngleLoc()); - return true; - } - bool VisitFunctionDecl(FunctionDecl *D) { if (D->isOverloadedOperator()) { const auto AddOpDeclToken = [&](SourceLocation Loc) { @@ -1087,11 +1082,12 @@ class CollectExtraHighlightings return true; } - bool VisitDependentTemplateSpecializationTypeLoc( - DependentTemplateSpecializationTypeLoc L) { - H.addToken(L.getTemplateNameLoc(), HighlightingKind::Type) - .addModifier(HighlightingModifier::DependentName) - .addModifier(HighlightingModifier::ClassScope); + bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) { + if (!L.getTypePtr()->getTemplateName().getAsTemplateDecl( + /*IgnoreDeduced=*/true)) + H.addToken(L.getTemplateNameLoc(), HighlightingKind::Type) + .addModifier(HighlightingModifier::DependentName) + .addModifier(HighlightingModifier::ClassScope); H.addAngleBracketTokens(L.getLAngleLoc(), L.getRAngleLoc()); return true; } diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index f369e1b0341e8..dd26182630ae1 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -1029,8 +1029,7 @@ TEST_F(TargetDeclTest, DependentTypes) { template void foo(typename A::template [[B]]); )cpp"; - EXPECT_DECLS("DependentTemplateSpecializationTypeLoc", - "template struct B"); + EXPECT_DECLS("TemplateSpecializationTypeLoc", "template struct B"); // Dependent name with recursive definition. We don't expect a // result, but we shouldn't get into a stack overflow either. diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp index 0cbf9a080a3ce..7bbdc8ba00dca 100644 --- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp @@ -321,6 +321,8 @@ class ASTWalker : public RecursiveASTVisitor { // TypeLoc visitors. void reportType(SourceLocation RefLoc, NamedDecl *ND) { + if (!ND) + return; // Reporting explicit references to types nested inside classes can cause // issues, e.g. a type accessed through a derived class shouldn't require // inclusion of the base. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4868714d898ec..51e5973098c14 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -458,7 +458,9 @@ AST Matchers following the corresponding changes in the clang AST. - Ensure ``hasBitWidth`` doesn't crash on bit widths that are dependent on template parameters. - +- Remove the ``dependentTemplateSpecializationType`` matcher, as the + corresponding AST node was removed. This matcher was never very useful, since + there was no way to match on its template name. - Add a boolean member ``IgnoreSystemHeaders`` to ``MatchFinderOptions``. This allows it to ignore nodes in system headers when traversing the AST. diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 1c17333b722f8..b8f6de69bbb98 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -241,9 +241,6 @@ class ASTContext : public RefCountedBase { mutable llvm::FoldingSet UsingTypes; mutable llvm::FoldingSet> TypedefTypes; mutable llvm::FoldingSet DependentNameTypes; - mutable llvm::DenseMap - DependentTemplateSpecializationTypes; mutable llvm::FoldingSet PackExpansionTypes; mutable llvm::FoldingSet ObjCObjectTypes; mutable llvm::FoldingSet ObjCObjectPointerTypes; @@ -1904,7 +1901,8 @@ class ASTContext : public RefCountedBase { TemplateTypeParmDecl *ParmDecl = nullptr) const; QualType getCanonicalTemplateSpecializationType( - TemplateName T, ArrayRef CanonicalArgs) const; + ElaboratedTypeKeyword Keyword, TemplateName T, + ArrayRef CanonicalArgs) const; QualType getTemplateSpecializationType(ElaboratedTypeKeyword Keyword, TemplateName T, @@ -1935,13 +1933,6 @@ class ASTContext : public RefCountedBase { NestedNameSpecifier NNS, const IdentifierInfo *Name) const; - QualType getDependentTemplateSpecializationType( - ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name, - ArrayRef Args) const; - QualType getDependentTemplateSpecializationType( - ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name, - ArrayRef Args, bool IsCanonical = false) const; - TemplateArgument getInjectedTemplateArg(NamedDecl *ParamDecl) const; /// Form a pack expansion type with the given pattern. diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index fe08d637a1e1d..ea68cc70f9131 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -533,11 +533,6 @@ class ASTNodeTraverser for (unsigned I=0, N=TL.getNumArgs(); I < N; ++I) dumpTemplateArgumentLoc(TL.getArgLoc(I)); } - void VisitDependentTemplateSpecializationTypeLoc( - DependentTemplateSpecializationTypeLoc TL) { - for (unsigned I=0, N=TL.getNumArgs(); I < N; ++I) - dumpTemplateArgumentLoc(TL.getArgLoc(I)); - } void VisitTypedefDecl(const TypedefDecl *D) { Visit(D->getUnderlyingType()); } diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 02581c8e73299..c1944487716de 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -1192,13 +1192,6 @@ DEF_TRAVERSE_TYPE(DependentNameType, { TRY_TO(TraverseNestedNameSpecifier(T->getQualifier())); }) -DEF_TRAVERSE_TYPE(DependentTemplateSpecializationType, { - const DependentTemplateStorage &S = T->getDependentTemplateName(); - if (TraverseQualifier) - TRY_TO(TraverseNestedNameSpecifier(S.getQualifier())); - TRY_TO(TraverseTemplateArguments(T->template_arguments())); -}) - DEF_TRAVERSE_TYPE(TemplateSpecializationType, { if (TraverseQualifier) { TRY_TO(TraverseTemplateName(T->getTemplateName())); @@ -1546,15 +1539,6 @@ DEF_TRAVERSE_TYPELOC(DependentNameType, { TRY_TO(TraverseNestedNameSpecifierLoc(TL.getQualifierLoc())); }) -DEF_TRAVERSE_TYPELOC(DependentTemplateSpecializationType, { - if (TraverseQualifier) - TRY_TO(TraverseNestedNameSpecifierLoc(TL.getQualifierLoc())); - - for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) { - TRY_TO(TraverseTemplateArgumentLoc(TL.getArgLoc(I))); - } -}) - DEF_TRAVERSE_TYPELOC(TemplateSpecializationType, { if (TraverseQualifier) TRY_TO(TraverseNestedNameSpecifierLoc(TL.getQualifierLoc())); diff --git a/clang/include/clang/AST/TemplateName.h b/clang/include/clang/AST/TemplateName.h index abb0669bff378..b6999a1b4e9b9 100644 --- a/clang/include/clang/AST/TemplateName.h +++ b/clang/include/clang/AST/TemplateName.h @@ -297,10 +297,10 @@ class TemplateName { /// set of function templates, returns NULL. TemplateDecl *getAsTemplateDecl(bool IgnoreDeduced = false) const; - /// Retrieves the underlying template declaration that + /// Retrieves the underlying template name that /// this template name refers to, along with the /// deduced default arguments, if any. - std::pair + std::pair getTemplateDeclAndDefaultArgs() const; /// Retrieve the underlying, overloaded function template diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h index db2ab04e4471c..9074992a3de8c 100644 --- a/clang/include/clang/AST/TypeBase.h +++ b/clang/include/clang/AST/TypeBase.h @@ -2250,22 +2250,6 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { unsigned NumArgs; }; - class DependentTemplateSpecializationTypeBitfields { - friend class DependentTemplateSpecializationType; - - LLVM_PREFERRED_TYPE(KeywordWrapperBitfields) - unsigned : NumTypeWithKeywordBits; - - /// The number of template arguments named in this class template - /// specialization, which is expected to be able to hold at least 1024 - /// according to [implimits]. However, as this limit is somewhat easy to - /// hit with template metaprogramming we'd prefer to keep it as large - /// as possible. At the moment it has been left as a non-bitfield since - /// this type safely fits in 64 bits as an unsigned, so there is no reason - /// to introduce the performance impact of a bitfield. - unsigned NumArgs; - }; - class PackExpansionTypeBitfields { friend class PackExpansionType; @@ -2346,8 +2330,6 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { SubstTemplateTypeParmTypeBitfields SubstTemplateTypeParmTypeBits; SubstPackTypeBitfields SubstPackTypeBits; TemplateSpecializationTypeBitfields TemplateSpecializationTypeBits; - DependentTemplateSpecializationTypeBitfields - DependentTemplateSpecializationTypeBits; PackExpansionTypeBitfields PackExpansionTypeBits; CountAttributedTypeBitfields CountAttributedTypeBits; PresefinedSugarTypeBitfields PredefinedSugarTypeBits; @@ -7366,9 +7348,9 @@ class TemplateSpecializationType : public TypeWithKeyword, } void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Ctx); - static void Profile(llvm::FoldingSetNodeID &ID, TemplateName T, - ArrayRef Args, QualType Underlying, - const ASTContext &Context); + static void Profile(llvm::FoldingSetNodeID &ID, ElaboratedTypeKeyword Keyword, + TemplateName T, ArrayRef Args, + QualType Underlying, const ASTContext &Context); static bool classof(const Type *T) { return T->getTypeClass() == TemplateSpecialization; @@ -7459,46 +7441,6 @@ class DependentNameType : public TypeWithKeyword, public llvm::FoldingSetNode { } }; -/// Represents a template specialization type whose template cannot be -/// resolved, e.g. -/// A::template B -class DependentTemplateSpecializationType : public TypeWithKeyword { - friend class ASTContext; // ASTContext creates these - - DependentTemplateStorage Name; - - DependentTemplateSpecializationType(ElaboratedTypeKeyword Keyword, - const DependentTemplateStorage &Name, - ArrayRef Args, - QualType Canon); - -public: - const DependentTemplateStorage &getDependentTemplateName() const { - return Name; - } - - ArrayRef template_arguments() const { - return {reinterpret_cast(this + 1), - DependentTemplateSpecializationTypeBits.NumArgs}; - } - - bool isSugared() const { return false; } - QualType desugar() const { return QualType(this, 0); } - - void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context) { - Profile(ID, Context, getKeyword(), Name, template_arguments()); - } - - static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context, - ElaboratedTypeKeyword Keyword, - const DependentTemplateStorage &Name, - ArrayRef Args); - - static bool classof(const Type *T) { - return T->getTypeClass() == DependentTemplateSpecialization; - } -}; - /// Represents a pack expansion of types. /// /// Pack expansions are part of C++11 variadic templates. A pack diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h index d52e10419e97a..38e8fba569396 100644 --- a/clang/include/clang/AST/TypeLoc.h +++ b/clang/include/clang/AST/TypeLoc.h @@ -2598,134 +2598,6 @@ class DependentNameTypeLoc : public ConcreteTypeLoc { -public: - SourceLocation getElaboratedKeywordLoc() const { - return this->getLocalData()->ElaboratedKWLoc; - } - - void setElaboratedKeywordLoc(SourceLocation Loc) { - this->getLocalData()->ElaboratedKWLoc = Loc; - } - - NestedNameSpecifierLoc getQualifierLoc() const { - if (!getLocalData()->QualifierData) - return NestedNameSpecifierLoc(); - - return NestedNameSpecifierLoc( - getTypePtr()->getDependentTemplateName().getQualifier(), - getLocalData()->QualifierData); - } - - void setQualifierLoc(NestedNameSpecifierLoc QualifierLoc) { - if (!QualifierLoc) { - // Even if we have a nested-name-specifier in the dependent - // template specialization type, we won't record the nested-name-specifier - // location information when this type-source location information is - // part of a nested-name-specifier. - getLocalData()->QualifierData = nullptr; - return; - } - - assert(QualifierLoc.getNestedNameSpecifier() == - getTypePtr()->getDependentTemplateName().getQualifier() && - "Inconsistent nested-name-specifier pointer"); - getLocalData()->QualifierData = QualifierLoc.getOpaqueData(); - } - - SourceLocation getTemplateKeywordLoc() const { - return getLocalData()->TemplateKWLoc; - } - - void setTemplateKeywordLoc(SourceLocation Loc) { - getLocalData()->TemplateKWLoc = Loc; - } - - SourceLocation getTemplateNameLoc() const { - return this->getLocalData()->NameLoc; - } - - void setTemplateNameLoc(SourceLocation Loc) { - this->getLocalData()->NameLoc = Loc; - } - - SourceLocation getLAngleLoc() const { - return this->getLocalData()->LAngleLoc; - } - - void setLAngleLoc(SourceLocation Loc) { - this->getLocalData()->LAngleLoc = Loc; - } - - SourceLocation getRAngleLoc() const { - return this->getLocalData()->RAngleLoc; - } - - void setRAngleLoc(SourceLocation Loc) { - this->getLocalData()->RAngleLoc = Loc; - } - - unsigned getNumArgs() const { - return getTypePtr()->template_arguments().size(); - } - - void setArgLocInfo(unsigned i, TemplateArgumentLocInfo AI) { - getArgInfos()[i] = AI; - } - - TemplateArgumentLocInfo getArgLocInfo(unsigned i) const { - return getArgInfos()[i]; - } - - TemplateArgumentLoc getArgLoc(unsigned i) const { - return TemplateArgumentLoc(getTypePtr()->template_arguments()[i], - getArgLocInfo(i)); - } - - SourceRange getLocalSourceRange() const { - if (getElaboratedKeywordLoc().isValid()) - return SourceRange(getElaboratedKeywordLoc(), getRAngleLoc()); - else if (getQualifierLoc()) - return SourceRange(getQualifierLoc().getBeginLoc(), getRAngleLoc()); - else if (getTemplateKeywordLoc().isValid()) - return SourceRange(getTemplateKeywordLoc(), getRAngleLoc()); - else - return SourceRange(getTemplateNameLoc(), getRAngleLoc()); - } - - void copy(DependentTemplateSpecializationTypeLoc Loc) { - unsigned size = getFullDataSize(); - assert(size == Loc.getFullDataSize()); - memcpy(Data, Loc.Data, size); - } - - void initializeLocal(ASTContext &Context, SourceLocation Loc); - - unsigned getExtraLocalDataSize() const { - return getNumArgs() * sizeof(TemplateArgumentLocInfo); - } - - unsigned getExtraLocalDataAlignment() const { - return alignof(TemplateArgumentLocInfo); - } - -private: - TemplateArgumentLocInfo *getArgInfos() const { - return static_cast(getExtraLocalData()); - } -}; - struct PackExpansionTypeLocInfo { SourceLocation EllipsisLoc; }; diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index 185a968217f97..b3932a67db69d 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -729,41 +729,6 @@ let Class = TemplateSpecializationType in { }]>; } -let Class = DependentTemplateSpecializationType in { - def : ReadHelper<[{ - const auto &dtn = node->getDependentTemplateName(); - auto name = dtn.getName(); - }]>; - - def : Property<"qualifier", NestedNameSpecifier> { - let Read = [{ dtn.getQualifier() }]; - } - def : Property<"identifier", Optional> { - let Read = [{ makeOptionalFromPointer(name.getIdentifier()) }]; - } - def : Property<"operatorKind", OverloadedOperatorKind> { - let Conditional = [{ !identifier }]; - let Read = [{ name.getOperator() }]; - } - def : Property<"HasTemplateKeyword", Bool> { - let Read = [{ dtn.hasTemplateKeyword() }]; - } - - def : Property<"keyword", ElaboratedTypeKeyword> { - let Read = [{ node->getKeyword() }]; - } - def : Property<"templateArguments", Array> { - let Read = [{ node->template_arguments() }]; - } - - def : Creator<[{ - DependentTemplateStorage S(qualifier, identifier ? IdentifierOrOverloadedOperator(*identifier) : - IdentifierOrOverloadedOperator(*operatorKind), - HasTemplateKeyword); - return ctx.getDependentTemplateSpecializationType(keyword, S, templateArguments); - }]>; -} - let Class = TemplateTypeParmType in { def : Property<"depth", UInt32> { let Read = [{ node->getDepth() }]; diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index f1d88a9523838..492863ddfc4a1 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -7712,18 +7712,6 @@ AST_MATCHER_P(DecayedType, hasDecayedType, internal::Matcher, /// \endcode extern const AstTypeMatcher dependentNameType; -/// Matches a dependent template specialization type -/// -/// Example matches A::template B -/// \code -/// template struct A; -/// template struct declToImport { -/// typename A::template B a; -/// }; -/// \endcode -extern const AstTypeMatcher - dependentTemplateSpecializationType; - /// Matches declarations whose declaration context, interpreted as a /// Decl, matches \c InnerMatcher. /// diff --git a/clang/include/clang/Basic/TypeNodes.td b/clang/include/clang/Basic/TypeNodes.td index fb6862b90987f..db43a8529f02b 100644 --- a/clang/include/clang/Basic/TypeNodes.td +++ b/clang/include/clang/Basic/TypeNodes.td @@ -5,10 +5,11 @@ class TypeNode : ASTNode { bit Abstract = abstract; } -/// A type node that is only used to represent dependent types in C++. For -/// example, DependentTemplateSpecializationType is used to represent types -/// where the base template-id is dependent (such as `T::foo`). Code -/// that only works with non-dependent types can ignore these type nodes. +/// A type node that is only used to represent dependent types in C++. +/// For example, DependentSizedArrayType is used to represent types where the +/// size expression is dependent (such as `T[V]`, where V is a constant template +/// parameter). Code that only works with non-dependent types can ignore these +/// type nodes. class AlwaysDependent {} /// A type node that is never used to represent a canonical type, which is to @@ -96,7 +97,6 @@ def DeducedType : TypeNode; def AutoType : TypeNode; def DeducedTemplateSpecializationType : TypeNode; def DependentNameType : TypeNode, AlwaysDependent; -def DependentTemplateSpecializationType : TypeNode, AlwaysDependent; def PackExpansionType : TypeNode, AlwaysDependent; def PackIndexingType : TypeNode, NeverCanonicalUnlessDependent; def ObjCTypeParamType : TypeNode, NeverCanonical; diff --git a/clang/include/clang/Sema/HeuristicResolver.h b/clang/include/clang/Sema/HeuristicResolver.h index 71588bee92d16..9a220ba147ecb 100644 --- a/clang/include/clang/Sema/HeuristicResolver.h +++ b/clang/include/clang/Sema/HeuristicResolver.h @@ -62,7 +62,7 @@ class HeuristicResolver { std::vector resolveDependentNameType(const DependentNameType *DNT) const; std::vector resolveTemplateSpecializationType( - const DependentTemplateSpecializationType *DTST) const; + const TemplateSpecializationType *TST) const; // Try to heuristically resolve a dependent nested name specifier // to the type it likely denotes. Note that *dependent* name specifiers always diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index a7600ab88febe..7e00085685b21 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11399,10 +11399,6 @@ class Sema final : public SemaBase { SourceLocation NameLoc, IdentifierInfo *&II); - bool resolveAssumedTemplateNameAsType(Scope *S, TemplateName &Name, - SourceLocation NameLoc, - bool Diagnose = true); - /// Determine whether a particular identifier might be the name in a C++1z /// deduction-guide declaration. bool isDeductionGuideName(Scope *S, const IdentifierInfo &Name, @@ -11643,7 +11639,8 @@ class Sema final : public SemaBase { QualType CheckTemplateIdType(ElaboratedTypeKeyword Keyword, TemplateName Template, SourceLocation TemplateLoc, - TemplateArgumentListInfo &TemplateArgs); + TemplateArgumentListInfo &TemplateArgs, + Scope *Scope, bool ForNestedNameSpecifier); TypeResult ActOnTemplateIdType(Scope *S, ElaboratedTypeKeyword ElaboratedKeyword, diff --git a/clang/include/clang/Serialization/TypeBitCodes.def b/clang/include/clang/Serialization/TypeBitCodes.def index bea15254922c1..d6c484563409c 100644 --- a/clang/include/clang/Serialization/TypeBitCodes.def +++ b/clang/include/clang/Serialization/TypeBitCodes.def @@ -39,7 +39,6 @@ TYPE_BIT_CODE(ObjCObject, OBJC_OBJECT, 28) TYPE_BIT_CODE(TemplateTypeParm, TEMPLATE_TYPE_PARM, 29) TYPE_BIT_CODE(TemplateSpecialization, TEMPLATE_SPECIALIZATION, 30) TYPE_BIT_CODE(DependentName, DEPENDENT_NAME, 31) -TYPE_BIT_CODE(DependentTemplateSpecialization, DEPENDENT_TEMPLATE_SPECIALIZATION, 32) TYPE_BIT_CODE(DependentSizedArray, DEPENDENT_SIZED_ARRAY, 33) TYPE_BIT_CODE(Paren, PAREN, 34) TYPE_BIT_CODE(PackExpansion, PACK_EXPANSION, 35) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index ed4c6b0e38be3..5240054c2f36b 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -4286,7 +4286,6 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const { case Type::DependentName: case Type::InjectedClassName: case Type::TemplateSpecialization: - case Type::DependentTemplateSpecialization: case Type::TemplateTypeParm: case Type::SubstTemplateTypeParmPack: case Type::SubstBuiltinTemplatePack: @@ -5932,6 +5931,30 @@ QualType ASTContext::getTemplateTypeParmType(unsigned Depth, unsigned Index, return QualType(TypeParm, 0); } +static ElaboratedTypeKeyword +getCanonicalElaboratedTypeKeyword(ElaboratedTypeKeyword Keyword) { + switch (Keyword) { + // These are just themselves. + case ElaboratedTypeKeyword::None: + case ElaboratedTypeKeyword::Struct: + case ElaboratedTypeKeyword::Union: + case ElaboratedTypeKeyword::Enum: + case ElaboratedTypeKeyword::Interface: + return Keyword; + + // These are equivalent. + case ElaboratedTypeKeyword::Typename: + return ElaboratedTypeKeyword::None; + + // These are functionally equivalent, so relying on their equivalence is + // IFNDR. By making them equivalent, we disallow overloading, which at least + // can produce a diagnostic. + case ElaboratedTypeKeyword::Class: + return ElaboratedTypeKeyword::Struct; + } + llvm_unreachable("unexpected keyword kind"); +} + TypeSourceInfo *ASTContext::getTemplateSpecializationTypeInfo( ElaboratedTypeKeyword Keyword, SourceLocation ElaboratedKeywordLoc, NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKeywordLoc, @@ -5970,17 +5993,20 @@ hasAnyPackExpansions(ArrayRef Args) { } QualType ASTContext::getCanonicalTemplateSpecializationType( - TemplateName Template, ArrayRef Args) const { + ElaboratedTypeKeyword Keyword, TemplateName Template, + ArrayRef Args) const { assert(Template == getCanonicalTemplateName(Template, /*IgnoreDeduced=*/true)); - assert(!Args.empty()); + assert((Keyword == ElaboratedTypeKeyword::None || + Template.getAsDependentTemplateName())); #ifndef NDEBUG for (const auto &Arg : Args) assert(Arg.structurallyEquals(getCanonicalTemplateArgument(Arg))); #endif llvm::FoldingSetNodeID ID; - TemplateSpecializationType::Profile(ID, Template, Args, QualType(), *this); + TemplateSpecializationType::Profile(ID, Keyword, Template, Args, QualType(), + *this); void *InsertPos = nullptr; if (auto *T = TemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos)) return QualType(T, 0); @@ -5988,9 +6014,9 @@ QualType ASTContext::getCanonicalTemplateSpecializationType( void *Mem = Allocate(sizeof(TemplateSpecializationType) + sizeof(TemplateArgument) * Args.size(), alignof(TemplateSpecializationType)); - auto *Spec = new (Mem) - TemplateSpecializationType(ElaboratedTypeKeyword::None, Template, - /*IsAlias=*/false, Args, QualType()); + auto *Spec = + new (Mem) TemplateSpecializationType(Keyword, Template, + /*IsAlias=*/false, Args, QualType()); assert(Spec->isDependentType() && "canonical template specialization must be dependent"); Types.push_back(Spec); @@ -6002,16 +6028,16 @@ QualType ASTContext::getTemplateSpecializationType( ElaboratedTypeKeyword Keyword, TemplateName Template, ArrayRef SpecifiedArgs, ArrayRef CanonicalArgs, QualType Underlying) const { - assert(!Template.getUnderlying().getAsDependentTemplateName() && - "No dependent template names here!"); - const auto *TD = Template.getAsTemplateDecl(/*IgnoreDeduced=*/true); bool IsTypeAlias = TD && TD->isTypeAlias(); if (Underlying.isNull()) { TemplateName CanonTemplate = getCanonicalTemplateName(Template, /*IgnoreDeduced=*/true); - bool NonCanonical = - Template != CanonTemplate || Keyword != ElaboratedTypeKeyword::None; + ElaboratedTypeKeyword CanonKeyword = + CanonTemplate.getAsDependentTemplateName() + ? getCanonicalElaboratedTypeKeyword(Keyword) + : ElaboratedTypeKeyword::None; + bool NonCanonical = Template != CanonTemplate || Keyword != CanonKeyword; SmallVector CanonArgsVec; if (CanonicalArgs.empty()) { CanonArgsVec = SmallVector(SpecifiedArgs); @@ -6033,8 +6059,8 @@ QualType ASTContext::getTemplateSpecializationType( "Caller must compute aliased type"); IsTypeAlias = false; - Underlying = - getCanonicalTemplateSpecializationType(CanonTemplate, CanonicalArgs); + Underlying = getCanonicalTemplateSpecializationType( + CanonKeyword, CanonTemplate, CanonicalArgs); if (!NonCanonical) return Underlying; } @@ -6085,30 +6111,6 @@ ASTContext::getMacroQualifiedType(QualType UnderlyingTy, return QualType(newType, 0); } -static ElaboratedTypeKeyword -getCanonicalElaboratedTypeKeyword(ElaboratedTypeKeyword Keyword) { - switch (Keyword) { - // These are just themselves. - case ElaboratedTypeKeyword::None: - case ElaboratedTypeKeyword::Struct: - case ElaboratedTypeKeyword::Union: - case ElaboratedTypeKeyword::Enum: - case ElaboratedTypeKeyword::Interface: - return Keyword; - - // These are equivalent. - case ElaboratedTypeKeyword::Typename: - return ElaboratedTypeKeyword::None; - - // These are functionally equivalent, so relying on their equivalence is - // IFNDR. By making them equivalent, we disallow overloading, which at least - // can produce a diagnostic. - case ElaboratedTypeKeyword::Class: - return ElaboratedTypeKeyword::Struct; - } - llvm_unreachable("unexpected keyword kind"); -} - QualType ASTContext::getDependentNameType(ElaboratedTypeKeyword Keyword, NestedNameSpecifier NNS, const IdentifierInfo *Name) const { @@ -6140,68 +6142,6 @@ QualType ASTContext::getDependentNameType(ElaboratedTypeKeyword Keyword, return QualType(T, 0); } -QualType ASTContext::getDependentTemplateSpecializationType( - ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name, - ArrayRef Args) const { - // TODO: avoid this copy - SmallVector ArgCopy; - for (unsigned I = 0, E = Args.size(); I != E; ++I) - ArgCopy.push_back(Args[I].getArgument()); - return getDependentTemplateSpecializationType(Keyword, Name, ArgCopy); -} - -QualType ASTContext::getDependentTemplateSpecializationType( - ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name, - ArrayRef Args, bool IsCanonical) const { - llvm::FoldingSetNodeID ID; - DependentTemplateSpecializationType::Profile(ID, *this, Keyword, Name, Args); - - if (auto const T_iter = DependentTemplateSpecializationTypes.find(ID); - T_iter != DependentTemplateSpecializationTypes.end()) - return QualType(T_iter->getSecond(), 0); - - NestedNameSpecifier NNS = Name.getQualifier(); - - QualType Canon; - if (!IsCanonical) { - ElaboratedTypeKeyword CanonKeyword = - getCanonicalElaboratedTypeKeyword(Keyword); - NestedNameSpecifier CanonNNS = NNS.getCanonical(); - bool AnyNonCanonArgs = false; - auto CanonArgs = - ::getCanonicalTemplateArguments(*this, Args, AnyNonCanonArgs); - - if (CanonKeyword != Keyword || AnyNonCanonArgs || CanonNNS != NNS || - !Name.hasTemplateKeyword()) { - Canon = getDependentTemplateSpecializationType( - CanonKeyword, {CanonNNS, Name.getName(), /*HasTemplateKeyword=*/true}, - CanonArgs, - /*IsCanonical=*/true); - } - } else { - assert(Keyword == getCanonicalElaboratedTypeKeyword(Keyword)); - assert(Name.hasTemplateKeyword()); - assert(NNS.isCanonical()); -#ifndef NDEBUG - for (const auto &Arg : Args) - assert(Arg.structurallyEquals(getCanonicalTemplateArgument(Arg))); -#endif - } - void *Mem = Allocate((sizeof(DependentTemplateSpecializationType) + - sizeof(TemplateArgument) * Args.size()), - alignof(DependentTemplateSpecializationType)); - auto *T = - new (Mem) DependentTemplateSpecializationType(Keyword, Name, Args, Canon); -#ifndef NDEBUG - llvm::FoldingSetNodeID InsertedID; - T->Profile(InsertedID, *this); - assert(InsertedID == ID && "ID does not match"); -#endif - Types.push_back(T); - DependentTemplateSpecializationTypes.try_emplace(ID, T); - return QualType(T, 0); -} - TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) const { TemplateArgument Arg; if (const auto *TTP = dyn_cast(Param)) { @@ -14327,21 +14267,6 @@ static QualType getCommonNonSugarTypeNode(const ASTContext &Ctx, const Type *X, getCommonTypeKeyword(NX, NY, /*IsSame=*/true), getCommonQualifier(Ctx, NX, NY, /*IsSame=*/true), NX->getIdentifier()); } - case Type::DependentTemplateSpecialization: { - const auto *TX = cast(X), - *TY = cast(Y); - auto As = getCommonTemplateArguments(Ctx, TX->template_arguments(), - TY->template_arguments()); - const DependentTemplateStorage &SX = TX->getDependentTemplateName(), - &SY = TY->getDependentTemplateName(); - assert(SX.getName() == SY.getName()); - DependentTemplateStorage Name( - getCommonNNS(Ctx, SX.getQualifier(), SY.getQualifier(), - /*IsSame=*/true), - SX.getName(), SX.hasTemplateKeyword() || SY.hasTemplateKeyword()); - return Ctx.getDependentTemplateSpecializationType( - getCommonTypeKeyword(TX, TY, /*IsSame=*/true), Name, As); - } case Type::UnaryTransform: { const auto *TX = cast(X), *TY = cast(Y); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index db14272ae5db8..1c8fd83feb7f8 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1890,25 +1890,6 @@ ASTNodeImporter::VisitPackExpansionType(const PackExpansionType *T) { /*ExpactPack=*/false); } -ExpectedType ASTNodeImporter::VisitDependentTemplateSpecializationType( - const DependentTemplateSpecializationType *T) { - const DependentTemplateStorage &DTN = T->getDependentTemplateName(); - auto QualifierOrErr = import(DTN.getQualifier()); - if (!QualifierOrErr) - return QualifierOrErr.takeError(); - - SmallVector ToPack; - ToPack.reserve(T->template_arguments().size()); - if (Error Err = ImportTemplateArguments(T->template_arguments(), ToPack)) - return std::move(Err); - - return Importer.getToContext().getDependentTemplateSpecializationType( - T->getKeyword(), - {*QualifierOrErr, Importer.Import(DTN.getName()), - DTN.hasTemplateKeyword()}, - ToPack); -} - ExpectedType ASTNodeImporter::VisitDependentNameType(const DependentNameType *T) { auto ToQualifierOrErr = import(T->getQualifier()); diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index 1292c30d47589..155734679b2da 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -1384,20 +1384,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, break; } - case Type::DependentTemplateSpecialization: { - const auto *Spec1 = cast(T1); - const auto *Spec2 = cast(T2); - if (Spec1->getKeyword() != Spec2->getKeyword()) - return false; - if (!IsStructurallyEquivalent(Context, Spec1->getDependentTemplateName(), - Spec2->getDependentTemplateName())) - return false; - if (!IsStructurallyEquivalent(Context, Spec1->template_arguments(), - Spec2->template_arguments())) - return false; - break; - } - case Type::PackExpansion: if (!IsStructurallyEquivalent(Context, cast(T1)->getPattern(), diff --git a/clang/lib/AST/ASTTypeTraits.cpp b/clang/lib/AST/ASTTypeTraits.cpp index d2f7fdbbad04d..84eb77730b1cb 100644 --- a/clang/lib/AST/ASTTypeTraits.cpp +++ b/clang/lib/AST/ASTTypeTraits.cpp @@ -249,10 +249,6 @@ SourceRange DynTypedNode::getSourceRange(bool IncludeQualifier) const { auto T = TL->castAs(); return SourceRange(T.getTemplateNameLoc(), T.getEndLoc()); } - case TypeLoc::DependentTemplateSpecialization: { - auto T = TL->castAs(); - return SourceRange(T.getTemplateNameLoc(), T.getEndLoc()); - } case TypeLoc::Enum: case TypeLoc::Record: case TypeLoc::InjectedClassName: diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index 3162857aac5d0..b6bb6117d42af 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -663,6 +663,7 @@ CanQualType ClassTemplateDecl::getCanonicalInjectedSpecializationType( Ctx.canonicalizeTemplateArguments(CanonicalArgs); CommonPtr->CanonInjectedTST = CanQualType::CreateUnsafe(Ctx.getCanonicalTemplateSpecializationType( + ElaboratedTypeKeyword::None, TemplateName(const_cast(getCanonicalDecl())), CanonicalArgs)); } @@ -1209,6 +1210,7 @@ ClassTemplatePartialSpecializationDecl::getCanonicalInjectedSpecializationType( if (CanonInjectedTST.isNull()) { CanonInjectedTST = CanQualType::CreateUnsafe(Ctx.getCanonicalTemplateSpecializationType( + ElaboratedTypeKeyword::None, TemplateName(getSpecializedTemplate()->getCanonicalDecl()), getTemplateArgs().asArray())); } diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 163cd43abd45a..2173aed5b45af 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -1311,19 +1311,6 @@ void CXXNameMangler::manglePrefix(QualType type) { mangleTemplateArgs(TST->getTemplateName(), TST->template_arguments()); addSubstitution(QualType(TST, 0)); } - } else if (const auto *DTST = - type->getAs()) { - if (!mangleSubstitution(QualType(DTST, 0))) { - TemplateName Template = getASTContext().getDependentTemplateName( - DTST->getDependentTemplateName()); - mangleTemplatePrefix(Template); - - // FIXME: GCC does not appear to mangle the template arguments when - // the template in question is a dependent template name. Should we - // emulate that badness? - mangleTemplateArgs(Template, DTST->template_arguments()); - addSubstitution(QualType(DTST, 0)); - } } else if (const auto *DNT = type->getAs()) { // Clang 14 and before did not consider this substitutable. bool Clang14Compat = isCompatibleWith(LangOptions::ClangABI::Ver14); @@ -2525,10 +2512,14 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty, mangleSourceNameWithAbiTags(TD); break; } + case TemplateName::DependentTemplate: { + const DependentTemplateStorage *S = TN.getAsDependentTemplateName(); + mangleSourceName(S->getName().getIdentifier()); + break; + } case TemplateName::OverloadedTemplate: case TemplateName::AssumedTemplate: - case TemplateName::DependentTemplate: case TemplateName::DeducedTemplate: llvm_unreachable("invalid base for a template specialization type"); @@ -2574,17 +2565,6 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty, mangleSourceName(cast(Ty)->getIdentifier()); break; - case Type::DependentTemplateSpecialization: { - const DependentTemplateSpecializationType *DTST = - cast(Ty); - TemplateName Template = getASTContext().getDependentTemplateName( - DTST->getDependentTemplateName()); - const DependentTemplateStorage &S = DTST->getDependentTemplateName(); - mangleSourceName(S.getName().getIdentifier()); - mangleTemplateArgs(Template, DTST->template_arguments()); - break; - } - case Type::Using: return mangleUnresolvedTypeOrSimpleId(cast(Ty)->desugar(), Prefix); @@ -4458,16 +4438,14 @@ void CXXNameMangler::mangleType(const TemplateSpecializationType *T) { if (TemplateDecl *TD = T->getTemplateName().getAsTemplateDecl()) { mangleTemplateName(TD, T->template_arguments()); } else { - if (mangleSubstitution(QualType(T, 0))) - return; - + Out << 'N'; mangleTemplatePrefix(T->getTemplateName()); // FIXME: GCC does not appear to mangle the template arguments when // the template in question is a dependent template name. Should we // emulate that badness? mangleTemplateArgs(T->getTemplateName(), T->template_arguments()); - addSubstitution(QualType(T, 0)); + Out << 'E'; } } @@ -4505,21 +4483,6 @@ void CXXNameMangler::mangleType(const DependentNameType *T) { Out << 'E'; } -void CXXNameMangler::mangleType(const DependentTemplateSpecializationType *T) { - // Dependently-scoped template types are nested if they have a prefix. - Out << 'N'; - - TemplateName Prefix = - getASTContext().getDependentTemplateName(T->getDependentTemplateName()); - mangleTemplatePrefix(Prefix); - - // FIXME: GCC does not appear to mangle the template arguments when - // the template in question is a dependent template name. Should we - // emulate that badness? - mangleTemplateArgs(Prefix, T->template_arguments()); - Out << 'E'; -} - void CXXNameMangler::mangleType(const TypeOfType *T) { // FIXME: this is pretty unsatisfactory, but there isn't an obvious // "extension with parameters" mangling. diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index d96472e393f68..8cbc72b1db735 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -3655,12 +3655,6 @@ void MicrosoftCXXNameMangler::mangleType(const DependentNameType *T, Qualifiers, Error(Range.getBegin(), "dependent name type") << Range; } -void MicrosoftCXXNameMangler::mangleType( - const DependentTemplateSpecializationType *T, Qualifiers, - SourceRange Range) { - Error(Range.getBegin(), "dependent template specialization type") << Range; -} - void MicrosoftCXXNameMangler::mangleType(const PackExpansionType *T, Qualifiers, SourceRange Range) { Error(Range.getBegin(), "pack expansion") << Range; diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp index fb95f58092c49..6842038b7eb57 100644 --- a/clang/lib/AST/ODRHash.cpp +++ b/clang/lib/AST/ODRHash.cpp @@ -1213,16 +1213,6 @@ class ODRTypeVisitor : public TypeVisitor { VisitTypeWithKeyword(T); } - void VisitDependentTemplateSpecializationType( - const DependentTemplateSpecializationType *T) { - Hash.AddDependentTemplateName(T->getDependentTemplateName()); - ID.AddInteger(T->template_arguments().size()); - for (const auto &TA : T->template_arguments()) { - Hash.AddTemplateArgument(TA); - } - VisitTypeWithKeyword(T); - } - void VisitUnaryTransformType(const UnaryTransformType *T) { AddQualType(T->getUnderlyingType()); AddQualType(T->getBaseType()); diff --git a/clang/lib/AST/TemplateName.cpp b/clang/lib/AST/TemplateName.cpp index f2cb15dbc43dd..2b8044e4188cd 100644 --- a/clang/lib/AST/TemplateName.cpp +++ b/clang/lib/AST/TemplateName.cpp @@ -213,25 +213,25 @@ TemplateDecl *TemplateName::getAsTemplateDecl(bool IgnoreDeduced) const { dyn_cast_if_present(Name.Storage)); } -std::pair +std::pair TemplateName::getTemplateDeclAndDefaultArgs() const { + DefaultArguments DefArgs; for (TemplateName Name = *this; /**/; /**/) { - if (Name.getKind() == TemplateName::DeducedTemplate) { - DeducedTemplateStorage *DTS = Name.getAsDeducedTemplateName(); - TemplateDecl *TD = - DTS->getUnderlying().getAsTemplateDecl(/*IgnoreDeduced=*/true); - DefaultArguments DefArgs = DTS->getDefaultArguments(); - if (TD && DefArgs) + if (DeducedTemplateStorage *DTS = Name.getAsDeducedTemplateName()) { + assert(!DefArgs && "multiple default args?"); + DefArgs = DTS->getDefaultArguments(); + if (TemplateDecl *TD = DTS->getUnderlying().getAsTemplateDecl(); + TD && DefArgs) assert(DefArgs.StartPos + DefArgs.Args.size() <= TD->getTemplateParameters()->size()); - return {TD, DTS->getDefaultArguments()}; + Name = DTS->getUnderlying(); } if (std::optional UnderlyingOrNone = Name.desugar(/*IgnoreDeduced=*/false)) { Name = *UnderlyingOrNone; continue; } - return {cast_if_present(Name.Storage.dyn_cast()), {}}; + return {Name, DefArgs}; } } diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 86621795d81e6..9794314a98f81 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -1933,10 +1933,6 @@ NestedNameSpecifier Type::getPrefix() const { return cast(this) ->getTemplateName() .getQualifier(); - case Type::DependentTemplateSpecialization: - return cast(this) - ->getDependentTemplateName() - .getQualifier(); case Type::Enum: case Type::Record: case Type::InjectedClassName: @@ -3215,7 +3211,6 @@ bool Type::isSpecifierType() const { case SubstTemplateTypeParm: case TemplateSpecialization: case DependentName: - case DependentTemplateSpecialization: case ObjCInterface: case ObjCObject: return true; @@ -3333,42 +3328,12 @@ StringRef KeywordHelpers::getKeywordName(ElaboratedTypeKeyword Keyword) { llvm_unreachable("Unknown elaborated type keyword."); } -DependentTemplateSpecializationType::DependentTemplateSpecializationType( - ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name, - ArrayRef Args, QualType Canon) - : TypeWithKeyword(Keyword, DependentTemplateSpecialization, Canon, - - toTypeDependence(Name.getDependence())), - Name(Name) { - DependentTemplateSpecializationTypeBits.NumArgs = Args.size(); - auto *ArgBuffer = const_cast(template_arguments().data()); - for (const TemplateArgument &Arg : Args) { - addDependence(toTypeDependence(Arg.getDependence() & - TemplateArgumentDependence::UnexpandedPack)); - - new (ArgBuffer++) TemplateArgument(Arg); - } -} - -void DependentTemplateSpecializationType::Profile( - llvm::FoldingSetNodeID &ID, const ASTContext &Context, - ElaboratedTypeKeyword Keyword, const DependentTemplateStorage &Name, - ArrayRef Args) { - ID.AddInteger(llvm::to_underlying(Keyword)); - Name.Profile(ID); - for (const TemplateArgument &Arg : Args) - Arg.Profile(ID, Context); -} - bool Type::isElaboratedTypeSpecifier() const { ElaboratedTypeKeyword Keyword; if (const auto *TST = dyn_cast(this)) Keyword = TST->getKeyword(); else if (const auto *DepName = dyn_cast(this)) Keyword = DepName->getKeyword(); - else if (const auto *DepTST = - dyn_cast(this)) - Keyword = DepTST->getKeyword(); else if (const auto *T = dyn_cast(this)) Keyword = T->getKeyword(); else if (const auto *T = dyn_cast(this)) @@ -4641,17 +4606,6 @@ TemplateSpecializationType::TemplateSpecializationType( TemplateSpecializationTypeBits.NumArgs = Args.size(); TemplateSpecializationTypeBits.TypeAlias = IsAlias; - assert(!T.getAsDependentTemplateName() && - "Use DependentTemplateSpecializationType for dependent template-name"); - assert((T.getKind() == TemplateName::Template || - T.getKind() == TemplateName::SubstTemplateTemplateParm || - T.getKind() == TemplateName::SubstTemplateTemplateParmPack || - T.getKind() == TemplateName::UsingTemplate || - T.getKind() == TemplateName::QualifiedTemplate || - T.getKind() == TemplateName::DeducedTemplate || - T.getKind() == TemplateName::AssumedTemplate) && - "Unexpected template name for TemplateSpecializationType"); - auto *TemplateArgs = const_cast(template_arguments().data()); for (const TemplateArgument &Arg : Args) { @@ -4690,15 +4644,17 @@ bool clang::TemplateSpecializationType::isSugared() const { void TemplateSpecializationType::Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Ctx) { - Profile(ID, Template, template_arguments(), + Profile(ID, getKeyword(), Template, template_arguments(), isSugared() ? desugar() : QualType(), Ctx); } void TemplateSpecializationType::Profile(llvm::FoldingSetNodeID &ID, + ElaboratedTypeKeyword Keyword, TemplateName T, ArrayRef Args, QualType Underlying, const ASTContext &Context) { + ID.AddInteger(llvm::to_underlying(Keyword)); T.Profile(ID); Underlying.Profile(ID); @@ -5105,7 +5061,6 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { case Type::SubstTemplateTypeParmPack: case Type::SubstBuiltinTemplatePack: case Type::DependentName: - case Type::DependentTemplateSpecialization: case Type::Auto: return ResultIfUnknown; diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp index 3e9597fc4d471..55476e2175a1f 100644 --- a/clang/lib/AST/TypeLoc.cpp +++ b/clang/lib/AST/TypeLoc.cpp @@ -477,8 +477,6 @@ NestedNameSpecifierLoc TypeLoc::getPrefix() const { return castAs().getQualifierLoc(); case TypeLoc::TemplateSpecialization: return castAs().getQualifierLoc(); - case TypeLoc::DependentTemplateSpecialization: - return castAs().getQualifierLoc(); case TypeLoc::DeducedTemplateSpecialization: return castAs().getQualifierLoc(); case TypeLoc::Enum: @@ -505,13 +503,6 @@ SourceLocation TypeLoc::getNonPrefixBeginLoc() const { Loc = TL.getTemplateNameLoc(); return Loc; } - case TypeLoc::DependentTemplateSpecialization: { - auto TL = castAs(); - SourceLocation Loc = TL.getTemplateKeywordLoc(); - if (!Loc.isValid()) - Loc = TL.getTemplateNameLoc(); - return Loc; - } case TypeLoc::DeducedTemplateSpecialization: { auto TL = castAs(); SourceLocation Loc = TL.getTemplateKeywordLoc(); @@ -550,12 +541,6 @@ SourceLocation TypeLoc::getNonElaboratedBeginLoc() const { return QualifierLoc.getBeginLoc(); return T.getTemplateNameLoc(); } - case TypeLoc::DependentTemplateSpecialization: { - auto T = castAs(); - if (NestedNameSpecifierLoc QualifierLoc = T.getQualifierLoc()) - return QualifierLoc.getBeginLoc(); - return T.getTemplateNameLoc(); - } case TypeLoc::DeducedTemplateSpecialization: { auto T = castAs(); if (NestedNameSpecifierLoc QualifierLoc = T.getQualifierLoc()) @@ -690,20 +675,6 @@ void DependentNameTypeLoc::initializeLocal(ASTContext &Context, setNameLoc(Loc); } -void -DependentTemplateSpecializationTypeLoc::initializeLocal(ASTContext &Context, - SourceLocation Loc) { - initializeElaboratedKeyword(*this, Loc); - setQualifierLoc(initializeQualifier( - Context, getTypePtr()->getDependentTemplateName().getQualifier(), Loc)); - setTemplateKeywordLoc(Loc); - setTemplateNameLoc(Loc); - setLAngleLoc(Loc); - setRAngleLoc(Loc); - TemplateSpecializationTypeLoc::initializeArgLocs( - Context, getTypePtr()->template_arguments(), getArgInfos(), Loc); -} - void TemplateSpecializationTypeLoc::set(SourceLocation ElaboratedKeywordLoc, NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKeywordLoc, @@ -949,8 +920,5 @@ AutoTypeLoc TypeLoc::getContainedAutoTypeLoc() const { SourceLocation TypeLoc::getTemplateKeywordLoc() const { if (const auto TSTL = getAsAdjusted()) return TSTL.getTemplateKeywordLoc(); - if (const auto DTSTL = - getAsAdjusted()) - return DTSTL.getTemplateKeywordLoc(); return SourceLocation(); } diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 54ca42d2035ad..cd59678d67f2f 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -237,7 +237,6 @@ bool TypePrinter::canPrefixQualifiers(const Type *T, case Type::TemplateSpecialization: case Type::InjectedClassName: case Type::DependentName: - case Type::DependentTemplateSpecialization: case Type::ObjCObject: case Type::ObjCTypeParam: case Type::ObjCInterface: @@ -1836,22 +1835,6 @@ void TypePrinter::printDependentNameBefore(const DependentNameType *T, void TypePrinter::printDependentNameAfter(const DependentNameType *T, raw_ostream &OS) {} -void TypePrinter::printDependentTemplateSpecializationBefore( - const DependentTemplateSpecializationType *T, raw_ostream &OS) { - IncludeStrongLifetimeRAII Strong(Policy); - - OS << TypeWithKeyword::getKeywordName(T->getKeyword()); - if (T->getKeyword() != ElaboratedTypeKeyword::None) - OS << " "; - - T->getDependentTemplateName().print(OS, Policy); - printTemplateArgumentList(OS, T->template_arguments(), Policy); - spaceBeforePlaceHolder(OS); -} - -void TypePrinter::printDependentTemplateSpecializationAfter( - const DependentTemplateSpecializationType *T, raw_ostream &OS) {} - void TypePrinter::printPackExpansionBefore(const PackExpansionType *T, raw_ostream &OS) { printBefore(T->getPattern(), OS); diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index 653b3810cb68b..1f0e007dafc65 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -1109,8 +1109,6 @@ const AstTypeMatcher templateTypeParmType; const AstTypeMatcher injectedClassNameType; const AstTypeMatcher decayedType; const AstTypeMatcher dependentNameType; -const AstTypeMatcher - dependentTemplateSpecializationType; AST_TYPELOC_TRAVERSE_MATCHER_DEF(hasElementType, AST_POLYMORPHIC_SUPPORTED_TYPES(ArrayType, ComplexType)); diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp index 48a7b91969aef..01c03f309a77b 100644 --- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp @@ -222,7 +222,6 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(declRefExpr); REGISTER_MATCHER(dependentNameType); REGISTER_MATCHER(dependentScopeDeclRefExpr); - REGISTER_MATCHER(dependentTemplateSpecializationType); REGISTER_MATCHER(declStmt); REGISTER_MATCHER(declaratorDecl); REGISTER_MATCHER(decltypeType); diff --git a/clang/lib/Sema/HeuristicResolver.cpp b/clang/lib/Sema/HeuristicResolver.cpp index 29840a430292e..a5d1f5dd389cb 100644 --- a/clang/lib/Sema/HeuristicResolver.cpp +++ b/clang/lib/Sema/HeuristicResolver.cpp @@ -41,8 +41,8 @@ class HeuristicResolverImpl { resolveUsingValueDecl(const UnresolvedUsingValueDecl *UUVD); std::vector resolveDependentNameType(const DependentNameType *DNT); - std::vector resolveTemplateSpecializationType( - const DependentTemplateSpecializationType *DTST); + std::vector + resolveTemplateSpecializationType(const TemplateSpecializationType *TST); QualType resolveNestedNameSpecifierToType(NestedNameSpecifier NNS); QualType getPointeeType(QualType T); std::vector @@ -373,8 +373,9 @@ HeuristicResolverImpl::resolveDependentNameType(const DependentNameType *DNT) { std::vector HeuristicResolverImpl::resolveTemplateSpecializationType( - const DependentTemplateSpecializationType *DTST) { - const DependentTemplateStorage &DTN = DTST->getDependentTemplateName(); + const TemplateSpecializationType *TST) { + const DependentTemplateStorage &DTN = + *TST->getTemplateName().getAsDependentTemplateName(); return resolveDependentMember( resolveNestedNameSpecifierToType(DTN.getQualifier()), DTN.getName().getIdentifier(), TemplateFilter); @@ -596,8 +597,8 @@ std::vector HeuristicResolver::resolveDependentNameType( } std::vector HeuristicResolver::resolveTemplateSpecializationType( - const DependentTemplateSpecializationType *DTST) const { - return HeuristicResolverImpl(Ctx).resolveTemplateSpecializationType(DTST); + const TemplateSpecializationType *TST) const { + return HeuristicResolverImpl(Ctx).resolveTemplateSpecializationType(TST); } QualType HeuristicResolver::resolveNestedNameSpecifierToType( NestedNameSpecifier NNS) const { diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index 3eed6ad7fe6b3..8411a3da8322d 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -157,8 +157,8 @@ void Sema::inferGslPointerAttribute(TypedefNameDecl *TD) { if (auto *TST = dyn_cast(Canonical.getTypePtr())) { - RD = dyn_cast_or_null( - TST->getTemplateName().getAsTemplateDecl()->getTemplatedDecl()); + if (const auto *TD = TST->getTemplateName().getAsTemplateDecl()) + RD = dyn_cast_or_null(TD->getTemplatedDecl()); } } diff --git a/clang/lib/Sema/SemaCXXScopeSpec.cpp b/clang/lib/Sema/SemaCXXScopeSpec.cpp index 437c69aa1587d..e89243b9d767a 100644 --- a/clang/lib/Sema/SemaCXXScopeSpec.cpp +++ b/clang/lib/Sema/SemaCXXScopeSpec.cpp @@ -896,64 +896,15 @@ bool Sema::ActOnCXXNestedNameSpecifier(Scope *S, if (SS.isInvalid()) return true; - TemplateName Template = OpaqueTemplate.get(); - // Translate the parser's template argument list in our AST format. TemplateArgumentListInfo TemplateArgs(LAngleLoc, RAngleLoc); translateTemplateArguments(TemplateArgsIn, TemplateArgs); - DependentTemplateName *DTN = Template.getAsDependentTemplateName(); - if (DTN && DTN->getName().getIdentifier()) { - // Handle a dependent template specialization for which we cannot resolve - // the template name. - assert(DTN->getQualifier() == SS.getScopeRep()); - QualType T = Context.getDependentTemplateSpecializationType( - ElaboratedTypeKeyword::None, - {SS.getScopeRep(), DTN->getName().getIdentifier(), - TemplateKWLoc.isValid()}, - TemplateArgs.arguments()); - - // Create source-location information for this type. - TypeLocBuilder Builder; - DependentTemplateSpecializationTypeLoc SpecTL - = Builder.push(T); - SpecTL.setElaboratedKeywordLoc(SourceLocation()); - SpecTL.setQualifierLoc(SS.getWithLocInContext(Context)); - SpecTL.setTemplateKeywordLoc(TemplateKWLoc); - SpecTL.setTemplateNameLoc(TemplateNameLoc); - SpecTL.setLAngleLoc(LAngleLoc); - SpecTL.setRAngleLoc(RAngleLoc); - for (unsigned I = 0, N = TemplateArgs.size(); I != N; ++I) - SpecTL.setArgLocInfo(I, TemplateArgs[I].getLocInfo()); - - SS.clear(); - SS.Make(Context, Builder.getTypeLocInContext(Context, T), CCLoc); - return false; - } - - // If we assumed an undeclared identifier was a template name, try to - // typo-correct it now. - if (Template.getAsAssumedTemplateName() && - resolveAssumedTemplateNameAsType(S, Template, TemplateNameLoc)) - return true; - - TemplateDecl *TD = Template.getAsTemplateDecl(); - if (Template.getAsOverloadedTemplate() || DTN || - isa(TD) || isa(TD)) { - SourceRange R(TemplateNameLoc, RAngleLoc); - if (SS.getRange().isValid()) - R.setBegin(SS.getRange().getBegin()); - - Diag(CCLoc, diag::err_non_type_template_in_nested_name_specifier) - << isa_and_nonnull(TD) << Template << R; - NoteAllFoundTemplates(Template); - return true; - } - // We were able to resolve the template name to an actual template. // Build an appropriate nested-name-specifier. - QualType T = CheckTemplateIdType(ElaboratedTypeKeyword::None, Template, - TemplateNameLoc, TemplateArgs); + QualType T = CheckTemplateIdType( + ElaboratedTypeKeyword::None, OpaqueTemplate.get(), TemplateNameLoc, + TemplateArgs, /*Scope=*/S, /*ForNestedNameSpecifier=*/true); if (T.isNull()) return true; @@ -961,7 +912,7 @@ bool Sema::ActOnCXXNestedNameSpecifier(Scope *S, // nested name specifiers. if (!T->isDependentType() && !isa(T.getCanonicalType())) { Diag(TemplateNameLoc, diag::err_nested_name_spec_non_tag) << T; - NoteAllFoundTemplates(Template); + NoteAllFoundTemplates(OpaqueTemplate.get()); return true; } diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index cc03616e0dfe1..229e91ed04caa 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -90,7 +90,8 @@ static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD, // Build the template-id. QualType CoroTrait = S.CheckTemplateIdType( - ElaboratedTypeKeyword::None, TemplateName(CoroTraits), KwLoc, Args); + ElaboratedTypeKeyword::None, TemplateName(CoroTraits), KwLoc, Args, + /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false); if (CoroTrait.isNull()) return QualType(); if (S.RequireCompleteType(KwLoc, CoroTrait, @@ -163,7 +164,8 @@ static QualType lookupCoroutineHandleType(Sema &S, QualType PromiseType, // Build the template-id. QualType CoroHandleType = S.CheckTemplateIdType( - ElaboratedTypeKeyword::None, TemplateName(CoroHandle), Loc, Args); + ElaboratedTypeKeyword::None, TemplateName(CoroHandle), Loc, Args, + /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false); if (CoroHandleType.isNull()) return QualType(); if (S.RequireCompleteType(Loc, CoroHandleType, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 7c1459e320167..2b0ddb584c37e 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6392,12 +6392,6 @@ bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC, NextTL = TL.castAs().getQualifierLoc().getAsTypeLoc(); break; - case TypeLoc::DependentTemplateSpecialization: { - auto TST = TL.castAs(); - TemplateKeywordLoc = TST.getTemplateKeywordLoc(); - NextTL = TST.getQualifierLoc().getAsTypeLoc(); - break; - } default: break; } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 63ce87b9b0607..8008c7b160bed 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1138,8 +1138,9 @@ static QualType getStdTrait(Sema &S, SourceLocation Loc, StringRef Trait, } // Build the template-id. - QualType TraitTy = S.CheckTemplateIdType(ElaboratedTypeKeyword::None, - TemplateName(TraitTD), Loc, Args); + QualType TraitTy = S.CheckTemplateIdType( + ElaboratedTypeKeyword::None, TemplateName(TraitTD), Loc, Args, + /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false); if (TraitTy.isNull()) return QualType(); @@ -12315,7 +12316,8 @@ static QualType BuildStdClassTemplate(Sema &S, ClassTemplateDecl *CTD, Args.addArgument(TemplateArgumentLoc(TemplateArgument(TypeParam), TSI)); return S.CheckTemplateIdType(ElaboratedTypeKeyword::None, TemplateName(CTD), - Loc, Args); + Loc, Args, /*Scope=*/nullptr, + /*ForNestedNameSpecifier=*/false); } QualType Sema::BuildStdInitializerList(QualType Element, SourceLocation Loc) { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index bd62ac6234180..439444281c2d5 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -21360,8 +21360,9 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) { QualType TST; { SFINAETrap Trap(*this); - TST = CheckTemplateIdType(ElaboratedTypeKeyword::None, TN, - NameInfo.getBeginLoc(), TAL); + TST = CheckTemplateIdType( + ElaboratedTypeKeyword::None, TN, NameInfo.getBeginLoc(), TAL, + /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false); } if (TST.isNull()) TST = Context.getTemplateSpecializationType( diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index 54918c560b655..25728de1779ad 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -4575,6 +4575,13 @@ static void getNestedNameSpecifierIdentifiers( case Type::TemplateSpecialization: { TemplateName Name = cast(T)->getTemplateName(); + if (const DependentTemplateName *DTN = + Name.getAsDependentTemplateName()) { + getNestedNameSpecifierIdentifiers(DTN->getQualifier(), Identifiers); + if (const auto *II = DTN->getName().getIdentifier()) + Identifiers.push_back(II); + return; + } if (const QualifiedTemplateName *QTN = Name.getAsQualifiedTemplateName()) { getNestedNameSpecifierIdentifiers(QTN->getQualifier(), Identifiers); @@ -4584,15 +4591,6 @@ static void getNestedNameSpecifierIdentifiers( Identifiers.push_back(TD->getIdentifier()); return; } - case Type::DependentTemplateSpecialization: { - const DependentTemplateStorage &S = - cast(T) - ->getDependentTemplateName(); - getNestedNameSpecifierIdentifiers(S.getQualifier(), Identifiers); - // FIXME: Should this dig into the Name as well? - // Identifiers.push_back(S.getName().getIdentifier()); - return; - } case Type::SubstTemplateTypeParm: T = cast(T) ->getReplacementType() diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 58dae32569bcc..d6b25c2d83613 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2845,6 +2845,16 @@ TemplateParameterList *Sema::MatchTemplateParametersToScopeSpecifier( if (const TemplateSpecializationType *TST = T->getAs()) { + TemplateName Name = TST->getTemplateName(); + if (const auto *DTS = Name.getAsDependentTemplateName()) { + // Look one step prior in a dependent template specialization type. + if (NestedNameSpecifier NNS = DTS->getQualifier(); + NNS.getKind() == NestedNameSpecifier::Kind::Type) + T = QualType(NNS.getAsType(), 0); + else + T = QualType(); + continue; + } if (TemplateDecl *Template = TST->getTemplateName().getAsTemplateDecl()) { if (TypeDecl *Parent = dyn_cast(Template->getDeclContext())) T = Context.getTypeDeclType(Parent); @@ -2854,18 +2864,6 @@ TemplateParameterList *Sema::MatchTemplateParametersToScopeSpecifier( } } - // Look one step prior in a dependent template specialization type. - if (const DependentTemplateSpecializationType *DependentTST - = T->getAs()) { - if (NestedNameSpecifier NNS = - DependentTST->getDependentTemplateName().getQualifier(); - NNS.getKind() == NestedNameSpecifier::Kind::Type) - T = QualType(NNS.getAsType(), 0); - else - T = QualType(); - continue; - } - // Look one step prior in a dependent name type. if (const DependentNameType *DependentName = T->getAs()){ if (NestedNameSpecifier NNS = DependentName->getQualifier(); @@ -2985,16 +2983,16 @@ TemplateParameterList *Sema::MatchTemplateParametersToScopeSpecifier( continue; } - } else if (const TemplateSpecializationType *TST - = T->getAs()) { - if (TemplateDecl *Template = TST->getTemplateName().getAsTemplateDecl()) { + } else if (const auto *TST = T->getAs()) { + TemplateName Name = TST->getTemplateName(); + if (TemplateDecl *Template = Name.getAsTemplateDecl()) { ExpectedTemplateParams = Template->getTemplateParameters(); NeedNonemptyTemplateHeader = true; + } else if (Name.getAsDeducedTemplateName()) { + // FIXME: We actually could/should check the template arguments here + // against the corresponding template parameter list. + NeedNonemptyTemplateHeader = false; } - } else if (T->getAs()) { - // FIXME: We actually could/should check the template arguments here - // against the corresponding template parameter list. - NeedNonemptyTemplateHeader = false; } // C++ [temp.expl.spec]p16: @@ -3203,8 +3201,9 @@ static QualType builtinCommonTypeImpl(Sema &S, ElaboratedTypeKeyword Keyword, Sema::SFINAETrap SFINAE(S, /*ForValidityCheck=*/true); Sema::ContextRAII TUContext(S, S.Context.getTranslationUnitDecl()); - QualType BaseTemplateInst = - S.CheckTemplateIdType(Keyword, BaseTemplate, TemplateLoc, Args); + QualType BaseTemplateInst = S.CheckTemplateIdType( + Keyword, BaseTemplate, TemplateLoc, Args, + /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false); if (SFINAE.hasErrorOccurred()) return QualType(); @@ -3422,7 +3421,9 @@ static QualType checkBuiltinTemplateIdType( // The first template argument will be reused as the template decl that // our synthetic template arguments will be applied to. return SemaRef.CheckTemplateIdType(Keyword, Converted[0].getAsTemplate(), - TemplateLoc, SyntheticTemplateArgs); + TemplateLoc, SyntheticTemplateArgs, + /*Scope=*/nullptr, + /*ForNestedNameSpecifier=*/false); } case BTK__type_pack_element: { @@ -3467,7 +3468,8 @@ static QualType checkBuiltinTemplateIdType( CT, TemplateArgs[1].getLocation()))); TemplateName HasTypeMember = Converted[1].getAsTemplate(); return SemaRef.CheckTemplateIdType(Keyword, HasTypeMember, TemplateLoc, - TAs); + TAs, /*Scope=*/nullptr, + /*ForNestedNameSpecifier=*/false); } QualType HasNoTypeMember = Converted[2].getAsType(); return HasNoTypeMember; @@ -3666,40 +3668,81 @@ Sema::findFailedBooleanCondition(Expr *Cond) { return { FailedCond, Description }; } +static TemplateName +resolveAssumedTemplateNameAsType(Sema &S, Scope *Scope, + const AssumedTemplateStorage *ATN, + SourceLocation NameLoc) { + // We assumed this undeclared identifier to be an (ADL-only) function + // template name, but it was used in a context where a type was required. + // Try to typo-correct it now. + LookupResult R(S, ATN->getDeclName(), NameLoc, S.LookupOrdinaryName); + struct CandidateCallback : CorrectionCandidateCallback { + bool ValidateCandidate(const TypoCorrection &TC) override { + return TC.getCorrectionDecl() && + getAsTypeTemplateDecl(TC.getCorrectionDecl()); + } + std::unique_ptr clone() override { + return std::make_unique(*this); + } + } FilterCCC; + + TypoCorrection Corrected = + S.CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), Scope, + /*SS=*/nullptr, FilterCCC, CorrectTypoKind::ErrorRecovery); + if (Corrected && Corrected.getFoundDecl()) { + S.diagnoseTypo(Corrected, S.PDiag(diag::err_no_template_suggest) + << ATN->getDeclName()); + return S.Context.getQualifiedTemplateName( + /*Qualifier=*/std::nullopt, /*TemplateKeyword=*/false, + TemplateName(Corrected.getCorrectionDeclAs())); + } + + return TemplateName(); +} + QualType Sema::CheckTemplateIdType(ElaboratedTypeKeyword Keyword, TemplateName Name, SourceLocation TemplateLoc, - TemplateArgumentListInfo &TemplateArgs) { - // FIXME: 'getUnderlying' loses SubstTemplateTemplateParm nodes from alias - // template substitutions. - if (DependentTemplateName *DTN = - Name.getUnderlying().getAsDependentTemplateName(); - DTN && DTN->getName().getIdentifier()) - // When building a template-id where the template-name is dependent, - // assume the template is a type template. Either our assumption is - // correct, or the code is ill-formed and will be diagnosed when the - // dependent name is substituted. - return Context.getDependentTemplateSpecializationType( - ElaboratedTypeKeyword::None, *DTN, TemplateArgs.arguments()); - - if (Name.getAsAssumedTemplateName() && - resolveAssumedTemplateNameAsType(/*Scope=*/nullptr, Name, TemplateLoc)) - return QualType(); + TemplateArgumentListInfo &TemplateArgs, + Scope *Scope, bool ForNestedNameSpecifier) { + auto [UnderlyingName, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs(); - TemplateDecl *Template; - DefaultArguments DefaultArgs; - if (const SubstTemplateTemplateParmPackStorage *S = - Name.getAsSubstTemplateTemplateParmPack()) { - Template = S->getParameterPack(); - } else { - std::tie(Template, DefaultArgs) = Name.getTemplateDeclAndDefaultArgs(); - if (!Template || isa(Template) || - isa(Template) || isa(Template)) { - Diag(TemplateLoc, diag::err_template_id_not_a_type) << Name; - NoteAllFoundTemplates(Name); - return QualType(); + TemplateDecl *Template = UnderlyingName.getAsTemplateDecl(); + if (!Template) { + if (const auto *S = UnderlyingName.getAsSubstTemplateTemplateParmPack()) { + Template = S->getParameterPack(); + } else if (const auto *DTN = UnderlyingName.getAsDependentTemplateName()) { + if (DTN->getName().getIdentifier()) + // When building a template-id where the template-name is dependent, + // assume the template is a type template. Either our assumption is + // correct, or the code is ill-formed and will be diagnosed when the + // dependent name is substituted. + return Context.getTemplateSpecializationType(Keyword, Name, + TemplateArgs.arguments(), + /*CanonicalArgs=*/{}); + } else if (const auto *ATN = UnderlyingName.getAsAssumedTemplateName()) { + if (TemplateName CorrectedName = ::resolveAssumedTemplateNameAsType( + *this, Scope, ATN, TemplateLoc); + CorrectedName.isNull()) { + Diag(TemplateLoc, diag::err_no_template) << ATN->getDeclName(); + return QualType(); + } else { + Name = CorrectedName; + Template = Name.getAsTemplateDecl(); + } } } + if (!Template || + isa(Template)) { + SourceRange R(TemplateLoc, TemplateArgs.getRAngleLoc()); + if (ForNestedNameSpecifier) + Diag(TemplateLoc, diag::err_non_type_template_in_nested_name_specifier) + << isa_and_nonnull(Template) << Name << R; + else + Diag(TemplateLoc, diag::err_template_id_not_a_type) << Name << R; + NoteAllFoundTemplates(Name); + return QualType(); + } // Check that the template argument list is well-formed for this // template. @@ -3810,6 +3853,7 @@ QualType Sema::CheckTemplateIdType(ElaboratedTypeKeyword Keyword, // // template struct A; CanonType = Context.getCanonicalTemplateSpecializationType( + ElaboratedTypeKeyword::None, Context.getCanonicalTemplateName(Name, /*IgnoreDeduced=*/true), CTAI.CanonicalConverted); assert(CanonType->isCanonicalUnqualified()); @@ -3908,55 +3952,19 @@ void Sema::ActOnUndeclaredTypeTemplateName(Scope *S, TemplateTy &ParsedName, IdentifierInfo *&II) { assert(TNK == TNK_Undeclared_template && "not an undeclared template name"); - TemplateName Name = ParsedName.get(); - auto *ATN = Name.getAsAssumedTemplateName(); + auto *ATN = ParsedName.get().getAsAssumedTemplateName(); assert(ATN && "not an assumed template name"); II = ATN->getDeclName().getAsIdentifierInfo(); - if (!resolveAssumedTemplateNameAsType(S, Name, NameLoc, /*Diagnose*/false)) { + if (TemplateName Name = + ::resolveAssumedTemplateNameAsType(*this, S, ATN, NameLoc); + !Name.isNull()) { // Resolved to a type template name. ParsedName = TemplateTy::make(Name); TNK = TNK_Type_template; } } -bool Sema::resolveAssumedTemplateNameAsType(Scope *S, TemplateName &Name, - SourceLocation NameLoc, - bool Diagnose) { - // We assumed this undeclared identifier to be an (ADL-only) function - // template name, but it was used in a context where a type was required. - // Try to typo-correct it now. - AssumedTemplateStorage *ATN = Name.getAsAssumedTemplateName(); - assert(ATN && "not an assumed template name"); - - LookupResult R(*this, ATN->getDeclName(), NameLoc, LookupOrdinaryName); - struct CandidateCallback : CorrectionCandidateCallback { - bool ValidateCandidate(const TypoCorrection &TC) override { - return TC.getCorrectionDecl() && - getAsTypeTemplateDecl(TC.getCorrectionDecl()); - } - std::unique_ptr clone() override { - return std::make_unique(*this); - } - } FilterCCC; - - TypoCorrection Corrected = - CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S, nullptr, - FilterCCC, CorrectTypoKind::ErrorRecovery); - if (Corrected && Corrected.getFoundDecl()) { - diagnoseTypo(Corrected, PDiag(diag::err_no_template_suggest) - << ATN->getDeclName()); - Name = Context.getQualifiedTemplateName( - /*Qualifier=*/std::nullopt, /*TemplateKeyword=*/false, - TemplateName(Corrected.getCorrectionDeclAs())); - return false; - } - - if (Diagnose) - Diag(R.getNameLoc(), diag::err_no_template) << R.getLookupName(); - return true; -} - TypeResult Sema::ActOnTemplateIdType( Scope *S, ElaboratedTypeKeyword ElaboratedKeyword, SourceLocation ElaboratedKeywordLoc, CXXScopeSpec &SS, @@ -4013,36 +4021,13 @@ TypeResult Sema::ActOnTemplateIdType( } } - TemplateName Template = TemplateD.get(); - if (Template.getAsAssumedTemplateName() && - resolveAssumedTemplateNameAsType(S, Template, TemplateIILoc)) - return true; - // Translate the parser's template argument list in our AST format. TemplateArgumentListInfo TemplateArgs(LAngleLoc, RAngleLoc); translateTemplateArguments(TemplateArgsIn, TemplateArgs); - if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) { - assert(SS.getScopeRep() == DTN->getQualifier()); - QualType T = Context.getDependentTemplateSpecializationType( - ElaboratedKeyword, *DTN, TemplateArgs.arguments()); - // Build type-source information. - TypeLocBuilder TLB; - DependentTemplateSpecializationTypeLoc SpecTL - = TLB.push(T); - SpecTL.setElaboratedKeywordLoc(ElaboratedKeywordLoc); - SpecTL.setQualifierLoc(SS.getWithLocInContext(Context)); - SpecTL.setTemplateKeywordLoc(TemplateKWLoc); - SpecTL.setTemplateNameLoc(TemplateIILoc); - SpecTL.setLAngleLoc(LAngleLoc); - SpecTL.setRAngleLoc(RAngleLoc); - for (unsigned I = 0, N = SpecTL.getNumArgs(); I != N; ++I) - SpecTL.setArgLocInfo(I, TemplateArgs[I].getLocInfo()); - return CreateParsedType(T, TLB.getTypeSourceInfo(Context, T)); - } - - QualType SpecTy = CheckTemplateIdType(ElaboratedKeyword, Template, - TemplateIILoc, TemplateArgs); + QualType SpecTy = CheckTemplateIdType( + ElaboratedKeyword, TemplateD.get(), TemplateIILoc, TemplateArgs, + /*Scope=*/S, /*ForNestedNameSpecifier=*/false); if (SpecTy.isNull()) return true; @@ -4067,8 +4052,6 @@ TypeResult Sema::ActOnTagTemplateIdType(TagUseKind TUK, if (SS.isInvalid()) return TypeResult(true); - TemplateName Template = TemplateD.get(); - // Translate the parser's template argument list in our AST format. TemplateArgumentListInfo TemplateArgs(LAngleLoc, RAngleLoc); translateTemplateArguments(TemplateArgsIn, TemplateArgs); @@ -4078,28 +4061,9 @@ TypeResult Sema::ActOnTagTemplateIdType(TagUseKind TUK, ElaboratedTypeKeyword Keyword = TypeWithKeyword::getKeywordForTagTypeKind(TagKind); - if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) { - assert(SS.getScopeRep() == DTN->getQualifier()); - QualType T = Context.getDependentTemplateSpecializationType( - Keyword, *DTN, TemplateArgs.arguments()); - - // Build type-source information. - TypeLocBuilder TLB; - DependentTemplateSpecializationTypeLoc SpecTL - = TLB.push(T); - SpecTL.setElaboratedKeywordLoc(TagLoc); - SpecTL.setQualifierLoc(SS.getWithLocInContext(Context)); - SpecTL.setTemplateKeywordLoc(TemplateKWLoc); - SpecTL.setTemplateNameLoc(TemplateLoc); - SpecTL.setLAngleLoc(LAngleLoc); - SpecTL.setRAngleLoc(RAngleLoc); - for (unsigned I = 0, N = SpecTL.getNumArgs(); I != N; ++I) - SpecTL.setArgLocInfo(I, TemplateArgs[I].getLocInfo()); - return CreateParsedType(T, TLB.getTypeSourceInfo(Context, T)); - } - QualType Result = - CheckTemplateIdType(Keyword, Template, TemplateLoc, TemplateArgs); + CheckTemplateIdType(Keyword, TemplateD.get(), TemplateLoc, TemplateArgs, + /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false); if (Result.isNull()) return TypeResult(true); @@ -6389,11 +6353,6 @@ bool UnnamedLocalNoLinkageFinder::VisitDependentNameType( return VisitNestedNameSpecifier(T->getQualifier()); } -bool UnnamedLocalNoLinkageFinder::VisitDependentTemplateSpecializationType( - const DependentTemplateSpecializationType* T) { - return VisitNestedNameSpecifier(T->getDependentTemplateName().getQualifier()); -} - bool UnnamedLocalNoLinkageFinder::VisitPackExpansionType( const PackExpansionType* T) { return Visit(T->getPattern()); @@ -7832,8 +7791,10 @@ bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param, bool PartialOrdering, bool *StrictPackMatch) { TemplateName Name = Arg.getArgument().getAsTemplateOrTemplatePattern(); - auto [Template, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs(); + auto [UnderlyingName, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs(); + TemplateDecl *Template = UnderlyingName.getAsTemplateDecl(); if (!Template) { + // FIXME: Handle AssumedTemplateNames // Any dependent template name is fine. assert(Name.isDependent() && "Non-dependent template isn't a declaration?"); return false; @@ -8949,6 +8910,7 @@ DeclResult Sema::ActOnClassTemplateSpecialization( } else { CanQualType CanonType = CanQualType::CreateUnsafe( Context.getCanonicalTemplateSpecializationType( + ElaboratedTypeKeyword::None, TemplateName(ClassTemplate->getCanonicalDecl()), CTAI.CanonicalConverted)); if (Context.hasSameType( @@ -11128,43 +11090,11 @@ Sema::ActOnTypenameType(Scope *S, SourceLocation TypenameLoc, TemplateArgumentListInfo TemplateArgs(LAngleLoc, RAngleLoc); translateTemplateArguments(TemplateArgsIn, TemplateArgs); - auto Keyword = TypenameLoc.isValid() ? ElaboratedTypeKeyword::Typename - : ElaboratedTypeKeyword::None; - - TemplateName Template = TemplateIn.get(); - if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) { - // Construct a dependent template specialization type. - assert(DTN && "dependent template has non-dependent name?"); - assert(DTN->getQualifier() == SS.getScopeRep()); - - if (!DTN->getName().getIdentifier()) { - Diag(TemplateIILoc, diag::err_template_id_not_a_type) << Template; - NoteAllFoundTemplates(Template); - return true; - } - - QualType T = Context.getDependentTemplateSpecializationType( - Keyword, *DTN, TemplateArgs.arguments()); - - // Create source-location information for this type. - TypeLocBuilder Builder; - DependentTemplateSpecializationTypeLoc SpecTL - = Builder.push(T); - SpecTL.setElaboratedKeywordLoc(TypenameLoc); - SpecTL.setQualifierLoc(SS.getWithLocInContext(Context)); - SpecTL.setTemplateKeywordLoc(TemplateKWLoc); - SpecTL.setTemplateNameLoc(TemplateIILoc); - SpecTL.setLAngleLoc(LAngleLoc); - SpecTL.setRAngleLoc(RAngleLoc); - for (unsigned I = 0, N = TemplateArgs.size(); I != N; ++I) - SpecTL.setArgLocInfo(I, TemplateArgs[I].getLocInfo()); - return CreateParsedType(T, Builder.getTypeSourceInfo(Context, T)); - } - - QualType T = CheckTemplateIdType(TypenameLoc.isValid() - ? ElaboratedTypeKeyword::Typename - : ElaboratedTypeKeyword::None, - Template, TemplateIILoc, TemplateArgs); + QualType T = CheckTemplateIdType( + TypenameLoc.isValid() ? ElaboratedTypeKeyword::Typename + : ElaboratedTypeKeyword::None, + TemplateIn.get(), TemplateIILoc, TemplateArgs, + /*Scope=*/S, /*ForNestedNameSpecifier=*/false); if (T.isNull()) return true; diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index cce40c0c91f95..64be2aab259f5 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -696,6 +696,11 @@ DeduceTemplateSpecArguments(Sema &S, TemplateParameterList *TemplateParams, if (isa(P.getCanonicalType())) { const TemplateSpecializationType *TP = ::getLastTemplateSpecType(P); TNP = TP->getTemplateName(); + + // No deduction for specializations of dependent template names. + if (TNP.getAsDependentTemplateName()) + return TemplateDeductionResult::Success; + // FIXME: To preserve sugar, the TST needs to carry sugared resolved // arguments. PResolved = @@ -2540,7 +2545,6 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch( case Type::Decltype: case Type::UnaryTransform: case Type::DeducedTemplateSpecialization: - case Type::DependentTemplateSpecialization: case Type::PackExpansion: case Type::Pipe: case Type::ArrayParameter: @@ -6495,9 +6499,9 @@ Sema::getMoreSpecializedPartialSpecialization( " the same template."); TemplateName Name(PS1->getSpecializedTemplate()->getCanonicalDecl()); QualType PT1 = Context.getCanonicalTemplateSpecializationType( - Name, PS1->getTemplateArgs().asArray()); + ElaboratedTypeKeyword::None, Name, PS1->getTemplateArgs().asArray()); QualType PT2 = Context.getCanonicalTemplateSpecializationType( - Name, PS2->getTemplateArgs().asArray()); + ElaboratedTypeKeyword::None, Name, PS2->getTemplateArgs().asArray()); TemplateDeductionInfo Info(Loc); return getMoreSpecialized(*this, PT1, PT2, PS1, PS2, Info); @@ -6512,10 +6516,10 @@ bool Sema::isMoreSpecializedThanPrimary( Primary->getInjectedTemplateArgs(Context)); Context.canonicalizeTemplateArguments(PrimaryCanonArgs); - QualType PrimaryT = - Context.getCanonicalTemplateSpecializationType(Name, PrimaryCanonArgs); + QualType PrimaryT = Context.getCanonicalTemplateSpecializationType( + ElaboratedTypeKeyword::None, Name, PrimaryCanonArgs); QualType PartialT = Context.getCanonicalTemplateSpecializationType( - Name, Spec->getTemplateArgs().asArray()); + ElaboratedTypeKeyword::None, Name, Spec->getTemplateArgs().asArray()); VarTemplatePartialSpecializationDecl *MaybeSpec = getMoreSpecialized(*this, PartialT, PrimaryT, Spec, Primary, Info); @@ -6993,8 +6997,12 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T, case Type::TemplateSpecialization: { const TemplateSpecializationType *Spec = cast(T); - MarkUsedTemplateParameters(Ctx, Spec->getTemplateName(), OnlyDeduced, - Depth, Used); + + TemplateName Name = Spec->getTemplateName(); + if (OnlyDeduced && Name.getAsDependentTemplateName()) + break; + + MarkUsedTemplateParameters(Ctx, Name, OnlyDeduced, Depth, Used); // C++0x [temp.deduct.type]p9: // If the template argument list of P contains a pack expansion that is @@ -7030,31 +7038,6 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T, OnlyDeduced, Depth, Used); break; - case Type::DependentTemplateSpecialization: { - // C++14 [temp.deduct.type]p5: - // The non-deduced contexts are: - // -- The nested-name-specifier of a type that was specified using a - // qualified-id - // - // C++14 [temp.deduct.type]p6: - // When a type name is specified in a way that includes a non-deduced - // context, all of the types that comprise that type name are also - // non-deduced. - if (OnlyDeduced) - break; - - const DependentTemplateSpecializationType *Spec - = cast(T); - - MarkUsedTemplateParameters(Ctx, - Spec->getDependentTemplateName().getQualifier(), - OnlyDeduced, Depth, Used); - - for (const auto &Arg : Spec->template_arguments()) - MarkUsedTemplateParameters(Ctx, Arg, OnlyDeduced, Depth, Used); - break; - } - case Type::TypeOf: if (!OnlyDeduced) MarkUsedTemplateParameters(Ctx, cast(T)->getUnmodifiedType(), diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index b3cbd7f8c1efe..df1a100cab22c 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -6951,8 +6951,9 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D, Args.addArgument( getTrivialTemplateArgumentLoc(UnpackedArg, QualType(), Loc)); } - QualType T = CheckTemplateIdType(ElaboratedTypeKeyword::None, - TemplateName(TD), Loc, Args); + QualType T = CheckTemplateIdType( + ElaboratedTypeKeyword::None, TemplateName(TD), Loc, Args, + /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false); // We may get a non-null type with errors, in which case // `getAsCXXRecordDecl` will return `nullptr`. For instance, this // happens when one of the template arguments is an invalid diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 0f655d7f684a5..d723fb80f437e 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -6036,15 +6036,6 @@ namespace { assert(TInfo); TL.copy(TInfo->getTypeLoc().castAs()); } - void VisitDependentTemplateSpecializationTypeLoc( - DependentTemplateSpecializationTypeLoc TL) { - assert(DS.getTypeSpecType() == TST_typename); - TypeSourceInfo *TInfo = nullptr; - Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); - assert(TInfo); - TL.copy( - TInfo->getTypeLoc().castAs()); - } void VisitAutoTypeLoc(AutoTypeLoc TL) { assert(DS.getTypeSpecType() == TST_auto || DS.getTypeSpecType() == TST_decltype_auto || diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 0587a7decbd8d..6136937210978 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -744,10 +744,11 @@ class TreeTransform { StmtResult TransformSEHHandler(Stmt *Handler); - QualType TransformDependentTemplateSpecializationType( - TypeLocBuilder &TLB, DependentTemplateSpecializationTypeLoc TL, - QualType ObjectType, NamedDecl *UnqualLookup, - bool AllowInjectedClassName); + QualType TransformTemplateSpecializationType(TypeLocBuilder &TLB, + TemplateSpecializationTypeLoc TL, + QualType ObjectType, + NamedDecl *FirstQualifierInScope, + bool AllowInjectedClassName); QualType TransformTagType(TypeLocBuilder &TLB, TagTypeLoc TL); @@ -1163,24 +1164,6 @@ class TreeTransform { return SemaRef.BuildParenType(InnerType); } - /// Build a new typename type that refers to a template-id. - /// - /// By default, builds a new DependentNameType type from the - /// nested-name-specifier and the given type. Subclasses may override - /// this routine to provide different behavior. - QualType RebuildDependentTemplateSpecializationType( - ElaboratedTypeKeyword Keyword, SourceLocation TemplateKWLoc, - TemplateName Name, SourceLocation NameLoc, TemplateArgumentListInfo &Args, - bool AllowInjectedClassName) { - // If it's still dependent, make a dependent specialization. - if (const DependentTemplateStorage *S = Name.getAsDependentTemplateName()) - return SemaRef.Context.getDependentTemplateSpecializationType( - Keyword, *S, Args.arguments()); - - return getDerived().RebuildTemplateSpecializationType(Keyword, Name, - NameLoc, Args); - } - /// Build a new typename type that refers to an identifier. /// /// By default, performs semantic analysis when building the typename type @@ -5526,19 +5509,18 @@ QualType TreeTransform::RebuildQualifiedType(QualType T, template QualType TreeTransform::TransformTypeInObjectScope( TypeLocBuilder &TLB, TypeLoc TL, QualType ObjectType, - NamedDecl *UnqualLookup) { + NamedDecl *FirstQualifierInScope) { assert(!getDerived().AlreadyTransformed(TL.getType())); switch (TL.getTypeLocClass()) { - case TypeLoc::DependentTemplateSpecialization: - return getDerived().TransformDependentTemplateSpecializationType( - TLB, TL.castAs(), ObjectType, - UnqualLookup, /*AllowInjectedClassName=*/true); - case TypeLoc::DependentName: { + case TypeLoc::TemplateSpecialization: + return getDerived().TransformTemplateSpecializationType( + TLB, TL.castAs(), ObjectType, + FirstQualifierInScope, /*AllowInjectedClassName=*/true); + case TypeLoc::DependentName: return getDerived().TransformDependentNameType( TLB, TL.castAs(), /*DeducedTSTContext=*/false, - ObjectType, UnqualLookup); - } + ObjectType, FirstQualifierInScope); default: // Any dependent canonical type can appear here, through type alias // templates. @@ -7504,12 +7486,22 @@ QualType TreeTransform::TransformAutoType(TypeLocBuilder &TLB, template QualType TreeTransform::TransformTemplateSpecializationType( TypeLocBuilder &TLB, TemplateSpecializationTypeLoc TL) { + return getDerived().TransformTemplateSpecializationType( + TLB, TL, /*ObjectType=*/QualType(), /*FirstQualifierInScope=*/nullptr, + /*AllowInjectedClassName=*/false); +} + +template +QualType TreeTransform::TransformTemplateSpecializationType( + TypeLocBuilder &TLB, TemplateSpecializationTypeLoc TL, QualType ObjectType, + NamedDecl *FirstQualifierInScope, bool AllowInjectedClassName) { const TemplateSpecializationType *T = TL.getTypePtr(); NestedNameSpecifierLoc QualifierLoc = TL.getQualifierLoc(); TemplateName Template = getDerived().TransformTemplateName( QualifierLoc, TL.getTemplateKeywordLoc(), T->getTemplateName(), - TL.getTemplateNameLoc()); + TL.getTemplateNameLoc(), ObjectType, FirstQualifierInScope, + AllowInjectedClassName); if (Template.isNull()) return QualType(); @@ -7532,23 +7524,6 @@ QualType TreeTransform::TransformTemplateSpecializationType( NewTemplateArgs); if (!Result.isNull()) { - // Specializations of template template parameters are represented as - // TemplateSpecializationTypes, and substitution of type alias templates - // within a dependent context can transform them into - // DependentTemplateSpecializationTypes. - if (isa(Result)) { - DependentTemplateSpecializationTypeLoc NewTL - = TLB.push(Result); - NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc()); - NewTL.setQualifierLoc(QualifierLoc); - NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc()); - NewTL.setTemplateNameLoc(TL.getTemplateNameLoc()); - NewTL.setLAngleLoc(TL.getLAngleLoc()); - NewTL.setRAngleLoc(TL.getRAngleLoc()); - for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i) - NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo()); - return Result; - } TLB.push(Result).set( TL.getElaboratedKeywordLoc(), QualifierLoc, TL.getTemplateKeywordLoc(), TL.getTemplateNameLoc(), NewTemplateArgs); @@ -7799,83 +7774,6 @@ QualType TreeTransform::TransformDependentNameType( return Result; } -template -QualType TreeTransform::TransformDependentTemplateSpecializationType( - TypeLocBuilder &TLB, DependentTemplateSpecializationTypeLoc TL) { - return getDerived().TransformDependentTemplateSpecializationType( - TLB, TL, QualType(), nullptr, false); -} - -template -QualType TreeTransform::TransformDependentTemplateSpecializationType( - TypeLocBuilder &TLB, DependentTemplateSpecializationTypeLoc TL, - QualType ObjectType, NamedDecl *UnqualLookup, bool AllowInjectedClassName) { - const DependentTemplateSpecializationType *T = TL.getTypePtr(); - - NestedNameSpecifierLoc QualifierLoc = TL.getQualifierLoc(); - if (QualifierLoc) { - QualifierLoc = getDerived().TransformNestedNameSpecifierLoc( - QualifierLoc, ObjectType, UnqualLookup); - if (!QualifierLoc) - return QualType(); - // These only apply to the leftmost prefix. - ObjectType = QualType(); - UnqualLookup = nullptr; - } - CXXScopeSpec SS; - SS.Adopt(QualifierLoc); - - TemplateArgumentListInfo NewTemplateArgs(TL.getLAngleLoc(), - TL.getRAngleLoc()); - auto ArgsRange = llvm::make_range>({TL, 0}, {TL, TL.getNumArgs()}); - - if (getDerived().TransformTemplateArguments(ArgsRange.begin(), - ArgsRange.end(), NewTemplateArgs)) - return QualType(); - bool TemplateArgumentsChanged = !llvm::equal( - ArgsRange, NewTemplateArgs.arguments(), - [](const TemplateArgumentLoc &A, const TemplateArgumentLoc &B) { - return A.getArgument().structurallyEquals(B.getArgument()); - }); - - const DependentTemplateStorage &DTN = T->getDependentTemplateName(); - - QualType Result = TL.getType(); - if (getDerived().AlwaysRebuild() || SS.getScopeRep() != DTN.getQualifier() || - TemplateArgumentsChanged || !ObjectType.isNull()) { - TemplateName Name = getDerived().RebuildTemplateName( - SS, TL.getTemplateKeywordLoc(), DTN.getName(), TL.getTemplateNameLoc(), - ObjectType, AllowInjectedClassName); - if (Name.isNull()) - return QualType(); - Result = getDerived().RebuildDependentTemplateSpecializationType( - T->getKeyword(), TL.getTemplateKeywordLoc(), Name, - TL.getTemplateNameLoc(), NewTemplateArgs, - /*AllowInjectedClassName=*/false); - if (Result.isNull()) - return QualType(); - } - - QualifierLoc = SS.getWithLocInContext(SemaRef.Context); - if (isa(Result)) { - TLB.push(Result).set( - TL.getElaboratedKeywordLoc(), QualifierLoc, TL.getTemplateKeywordLoc(), - TL.getTemplateNameLoc(), NewTemplateArgs); - } else { - auto SpecTL = TLB.push(Result); - SpecTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc()); - SpecTL.setQualifierLoc(QualifierLoc); - SpecTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc()); - SpecTL.setTemplateNameLoc(TL.getTemplateNameLoc()); - SpecTL.setLAngleLoc(TL.getLAngleLoc()); - SpecTL.setRAngleLoc(TL.getRAngleLoc()); - for (unsigned I = 0, E = NewTemplateArgs.size(); I != E; ++I) - SpecTL.setArgLocInfo(I, NewTemplateArgs[I].getLocInfo()); - } - return Result; -} - template QualType TreeTransform::TransformPackExpansionType(TypeLocBuilder &TLB, PackExpansionTypeLoc TL) { @@ -17468,8 +17366,9 @@ template QualType TreeTransform::RebuildTemplateSpecializationType( ElaboratedTypeKeyword Keyword, TemplateName Template, SourceLocation TemplateNameLoc, TemplateArgumentListInfo &TemplateArgs) { - return SemaRef.CheckTemplateIdType(Keyword, Template, TemplateNameLoc, - TemplateArgs); + return SemaRef.CheckTemplateIdType( + Keyword, Template, TemplateNameLoc, TemplateArgs, + /*Scope=*/nullptr, /*ForNestedNameSpecifier=*/false); } template diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 1b3a8b13f1fb1..5f40e94074702 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -7532,20 +7532,6 @@ void TypeLocReader::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) { TL.setNameLoc(readSourceLocation()); } -void TypeLocReader::VisitDependentTemplateSpecializationTypeLoc( - DependentTemplateSpecializationTypeLoc TL) { - TL.setElaboratedKeywordLoc(readSourceLocation()); - TL.setQualifierLoc(ReadNestedNameSpecifierLoc()); - TL.setTemplateKeywordLoc(readSourceLocation()); - TL.setTemplateNameLoc(readSourceLocation()); - TL.setLAngleLoc(readSourceLocation()); - TL.setRAngleLoc(readSourceLocation()); - for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) - TL.setArgLocInfo(I, - Reader.readTemplateArgumentLocInfo( - TL.getTypePtr()->template_arguments()[I].getKind())); -} - void TypeLocReader::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) { TL.setEllipsisLoc(readSourceLocation()); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index a3a25e48f9065..15a3ed4c427f8 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -659,18 +659,6 @@ void TypeLocWriter::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } -void TypeLocWriter::VisitDependentTemplateSpecializationTypeLoc( - DependentTemplateSpecializationTypeLoc TL) { - addSourceLocation(TL.getElaboratedKeywordLoc()); - Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc()); - addSourceLocation(TL.getTemplateKeywordLoc()); - addSourceLocation(TL.getTemplateNameLoc()); - addSourceLocation(TL.getLAngleLoc()); - addSourceLocation(TL.getRAngleLoc()); - for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) - Record.AddTemplateArgumentLocInfo(TL.getArgLoc(I)); -} - void TypeLocWriter::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) { addSourceLocation(TL.getEllipsisLoc()); } @@ -1058,7 +1046,6 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(TYPE_TEMPLATE_TYPE_PARM); RECORD(TYPE_TEMPLATE_SPECIALIZATION); RECORD(TYPE_DEPENDENT_NAME); - RECORD(TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION); RECORD(TYPE_DEPENDENT_SIZED_ARRAY); RECORD(TYPE_PAREN); RECORD(TYPE_MACRO_QUALIFIED); diff --git a/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp b/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp index d9444110d421c..c9108fc299cc1 100644 --- a/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp +++ b/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp @@ -157,12 +157,6 @@ SourceLocation StartLocationForType(TypeLoc TL) { return QualifierLoc.getBeginLoc(); return TTL.getNameLoc(); } - case TypeLoc::DependentTemplateSpecialization: { - auto TTL = TL.castAs(); - if (NestedNameSpecifierLoc QualifierLoc = TTL.getQualifierLoc()) - return QualifierLoc.getBeginLoc(); - return TTL.getTemplateNameLoc(); - } default: llvm_unreachable("unhandled TypeLoc class"); } diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index b75f8ff6defee..90fd1f91b9ef2 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -974,13 +974,6 @@ class BuildTreeVisitor : public RecursiveASTVisitor { BeginLoc = TST.getTemplateNameLoc(); return buildSimpleTemplateName({BeginLoc, TST.getEndLoc()}); } - case TypeLoc::DependentTemplateSpecialization: { - auto DT = TL.castAs(); - SourceLocation BeginLoc = DT.getTemplateKeywordLoc(); - if (BeginLoc.isInvalid()) - BeginLoc = DT.getTemplateNameLoc(); - return buildSimpleTemplateName({BeginLoc, DT.getEndLoc()}); - } case TypeLoc::Decltype: { const auto DTL = TL.castAs(); if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc( diff --git a/clang/test/AST/ast-dump-templates.cpp b/clang/test/AST/ast-dump-templates.cpp index e43fe6b1dda25..18f62e4acdc78 100644 --- a/clang/test/AST/ast-dump-templates.cpp +++ b/clang/test/AST/ast-dump-templates.cpp @@ -175,7 +175,10 @@ namespace TestDependentMemberPointer { // DUMP-NEXT: | `-BuiltinType {{.+}} 'int' // DUMP-NEXT: `-TypeAliasDecl {{.+}} Z 'int U::template V::*'{{$}} // DUMP-NEXT: `-MemberPointerType {{.+}} 'int U::template V::*' dependent -// DUMP-NEXT: |-DependentTemplateSpecializationType {{.+}} 'U::template V' dependent +// DUMP-NEXT: |-TemplateSpecializationType {{.+}} 'U::template V' dependent +// DUMP-NEXT: | |-name: 'U::template V':'type-parameter-0-0::template V' dependent +// DUMP-NEXT: | | `-NestedNameSpecifier TypeSpec 'U' +// DUMP-NEXT: | `-TemplateArgument type 'int' // DUMP-NEXT: `-BuiltinType {{.+}} 'int' } // namespace TestDependentMemberPointer @@ -237,6 +240,28 @@ namespace GH153540 { // DUMP-NEXT: CXXConstructExpr {{.*}} 'N::S':'GH153540::N::S' 'void (int)' } // namespace GH153540 +namespace AliasDependentTemplateSpecializationType { + // DUMP-LABEL: NamespaceDecl {{.*}} AliasDependentTemplateSpecializationType{{$}} + + template class TT> using T1 = TT; + template using T2 = T1; + +// DUMP: TypeAliasDecl {{.*}} T2 'T1':'T::template X' +// DUMP-NEXT: `-TemplateSpecializationType {{.*}} 'T1' sugar dependent alias +// DUMP-NEXT: |-name: 'T1':'AliasDependentTemplateSpecializationType::T1' qualified +// DUMP-NEXT: | `-TypeAliasTemplateDecl {{.*}} T1 +// DUMP-NEXT: |-TemplateArgument template 'T::template X':'type-parameter-0-0::template X' dependent +// DUMP-NEXT: | `-NestedNameSpecifier TypeSpec 'T' +// DUMP-NEXT: `-TemplateSpecializationType {{.*}} 'T::template X' dependent +// DUMP-NEXT: |-name: 'T::template X':'type-parameter-0-0::template X' subst index 0 final +// DUMP-NEXT: | |-parameter: TemplateTemplateParmDecl {{.*}} depth 0 index 0 TT +// DUMP-NEXT: | |-associated TypeAliasTemplate {{.*}} 'T1' +// DUMP-NEXT: | `-replacement: 'T::template X':'type-parameter-0-0::template X' dependent +// DUMP-NEXT: | `-NestedNameSpecifier TypeSpec 'T' +// DUMP-NEXT: `-TemplateArgument type 'int' +// DUMP-NEXT: `-BuiltinType {{.*}} 'int' +} // namespace + // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py @@ -6646,8 +6671,8 @@ namespace GH153540 { // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6425, -// JSON-NEXT: "line": 180, +// JSON-NEXT: "offset": 6613, +// JSON-NEXT: "line": 183, // JSON-NEXT: "col": 1, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -6961,12 +6986,30 @@ namespace GH153540 { // JSON-NEXT: "inner": [ // JSON-NEXT: { // JSON-NEXT: "id": "0x{{.*}}", -// JSON-NEXT: "kind": "DependentTemplateSpecializationType", +// JSON-NEXT: "kind": "TemplateSpecializationType", // JSON-NEXT: "type": { // JSON-NEXT: "qualType": "U::template V" // JSON-NEXT: }, // JSON-NEXT: "isDependent": true, -// JSON-NEXT: "isInstantiationDependent": true +// JSON-NEXT: "isInstantiationDependent": true, +// JSON-NEXT: "templateName": "U::template V", +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "kind": "TemplateArgument", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "int" +// JSON-NEXT: }, +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "BuiltinType", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "int" +// JSON-NEXT: } +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: } +// JSON-NEXT: ] // JSON-NEXT: }, // JSON-NEXT: { // JSON-NEXT: "id": "0x{{.*}}", @@ -6989,20 +7032,20 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "NamespaceDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6478, -// JSON-NEXT: "line": 182, +// JSON-NEXT: "offset": 6666, +// JSON-NEXT: "line": 185, // JSON-NEXT: "col": 11, // JSON-NEXT: "tokLen": 19 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6468, +// JSON-NEXT: "offset": 6656, // JSON-NEXT: "col": 1, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9336, -// JSON-NEXT: "line": 222, +// JSON-NEXT: "offset": 9524, +// JSON-NEXT: "line": 225, // JSON-NEXT: "col": 1, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -7013,19 +7056,19 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ClassTemplateDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6601, -// JSON-NEXT: "line": 184, +// JSON-NEXT: "offset": 6789, +// JSON-NEXT: "line": 187, // JSON-NEXT: "col": 41, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6563, +// JSON-NEXT: "offset": 6751, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6612, +// JSON-NEXT: "offset": 6800, // JSON-NEXT: "col": 52, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -7036,18 +7079,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "TemplateTypeParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6579, +// JSON-NEXT: "offset": 6767, // JSON-NEXT: "col": 19, // JSON-NEXT: "tokLen": 3 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6573, +// JSON-NEXT: "offset": 6761, // JSON-NEXT: "col": 13, // JSON-NEXT: "tokLen": 5 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6579, +// JSON-NEXT: "offset": 6767, // JSON-NEXT: "col": 19, // JSON-NEXT: "tokLen": 3 // JSON-NEXT: } @@ -7061,18 +7104,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "NonTypeTemplateParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6589, +// JSON-NEXT: "offset": 6777, // JSON-NEXT: "col": 29, // JSON-NEXT: "tokLen": 3 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6584, +// JSON-NEXT: "offset": 6772, // JSON-NEXT: "col": 24, // JSON-NEXT: "tokLen": 4 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6589, +// JSON-NEXT: "offset": 6777, // JSON-NEXT: "col": 29, // JSON-NEXT: "tokLen": 3 // JSON-NEXT: } @@ -7088,18 +7131,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXRecordDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6601, +// JSON-NEXT: "offset": 6789, // JSON-NEXT: "col": 41, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6594, +// JSON-NEXT: "offset": 6782, // JSON-NEXT: "col": 34, // JSON-NEXT: "tokLen": 6 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6612, +// JSON-NEXT: "offset": 6800, // JSON-NEXT: "col": 52, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -7162,18 +7205,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXRecordDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6601, +// JSON-NEXT: "offset": 6789, // JSON-NEXT: "col": 41, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6594, +// JSON-NEXT: "offset": 6782, // JSON-NEXT: "col": 34, // JSON-NEXT: "tokLen": 6 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6601, +// JSON-NEXT: "offset": 6789, // JSON-NEXT: "col": 41, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: } @@ -7190,19 +7233,19 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ClassTemplateDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6655, -// JSON-NEXT: "line": 185, +// JSON-NEXT: "offset": 6843, +// JSON-NEXT: "line": 188, // JSON-NEXT: "col": 41, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6617, +// JSON-NEXT: "offset": 6805, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6666, +// JSON-NEXT: "offset": 6854, // JSON-NEXT: "col": 52, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -7213,18 +7256,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "TemplateTypeParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6633, +// JSON-NEXT: "offset": 6821, // JSON-NEXT: "col": 19, // JSON-NEXT: "tokLen": 3 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6627, +// JSON-NEXT: "offset": 6815, // JSON-NEXT: "col": 13, // JSON-NEXT: "tokLen": 5 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6633, +// JSON-NEXT: "offset": 6821, // JSON-NEXT: "col": 19, // JSON-NEXT: "tokLen": 3 // JSON-NEXT: } @@ -7238,18 +7281,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "NonTypeTemplateParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6643, +// JSON-NEXT: "offset": 6831, // JSON-NEXT: "col": 29, // JSON-NEXT: "tokLen": 3 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6638, +// JSON-NEXT: "offset": 6826, // JSON-NEXT: "col": 24, // JSON-NEXT: "tokLen": 4 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6643, +// JSON-NEXT: "offset": 6831, // JSON-NEXT: "col": 29, // JSON-NEXT: "tokLen": 3 // JSON-NEXT: } @@ -7265,18 +7308,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXRecordDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6655, +// JSON-NEXT: "offset": 6843, // JSON-NEXT: "col": 41, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6648, +// JSON-NEXT: "offset": 6836, // JSON-NEXT: "col": 34, // JSON-NEXT: "tokLen": 6 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6666, +// JSON-NEXT: "offset": 6854, // JSON-NEXT: "col": 52, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -7339,18 +7382,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXRecordDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6655, +// JSON-NEXT: "offset": 6843, // JSON-NEXT: "col": 41, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6648, +// JSON-NEXT: "offset": 6836, // JSON-NEXT: "col": 34, // JSON-NEXT: "tokLen": 6 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6655, +// JSON-NEXT: "offset": 6843, // JSON-NEXT: "col": 41, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: } @@ -7367,21 +7410,21 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ClassTemplatePartialSpecializationDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6719, -// JSON-NEXT: "line": 188, +// JSON-NEXT: "offset": 6907, +// JSON-NEXT: "line": 191, // JSON-NEXT: "col": 10, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6672, -// JSON-NEXT: "line": 187, +// JSON-NEXT: "offset": 6860, +// JSON-NEXT: "line": 190, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6753, -// JSON-NEXT: "line": 188, +// JSON-NEXT: "offset": 6941, +// JSON-NEXT: "line": 191, // JSON-NEXT: "col": 44, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -7488,12 +7531,12 @@ namespace GH153540 { // JSON-NEXT: "kind": "DeclRefExpr", // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6743, +// JSON-NEXT: "offset": 6931, // JSON-NEXT: "col": 34, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6743, +// JSON-NEXT: "offset": 6931, // JSON-NEXT: "col": 34, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7527,12 +7570,12 @@ namespace GH153540 { // JSON-NEXT: "kind": "DeclRefExpr", // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6748, +// JSON-NEXT: "offset": 6936, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6748, +// JSON-NEXT: "offset": 6936, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7556,19 +7599,19 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "TemplateTypeParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6688, -// JSON-NEXT: "line": 187, +// JSON-NEXT: "offset": 6876, +// JSON-NEXT: "line": 190, // JSON-NEXT: "col": 19, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6682, +// JSON-NEXT: "offset": 6870, // JSON-NEXT: "col": 13, // JSON-NEXT: "tokLen": 5 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6688, +// JSON-NEXT: "offset": 6876, // JSON-NEXT: "col": 19, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7583,18 +7626,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "NonTypeTemplateParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6697, +// JSON-NEXT: "offset": 6885, // JSON-NEXT: "col": 28, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6692, +// JSON-NEXT: "offset": 6880, // JSON-NEXT: "col": 23, // JSON-NEXT: "tokLen": 4 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6697, +// JSON-NEXT: "offset": 6885, // JSON-NEXT: "col": 28, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7611,18 +7654,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "NonTypeTemplateParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6706, +// JSON-NEXT: "offset": 6894, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6701, +// JSON-NEXT: "offset": 6889, // JSON-NEXT: "col": 32, // JSON-NEXT: "tokLen": 4 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6706, +// JSON-NEXT: "offset": 6894, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7639,19 +7682,19 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXRecordDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 6719, -// JSON-NEXT: "line": 188, +// JSON-NEXT: "offset": 6907, +// JSON-NEXT: "line": 191, // JSON-NEXT: "col": 10, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 6712, +// JSON-NEXT: "offset": 6900, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 6 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 6719, +// JSON-NEXT: "offset": 6907, // JSON-NEXT: "col": 10, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: } @@ -7666,21 +7709,21 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ClassTemplatePartialSpecializationDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 8035, -// JSON-NEXT: "line": 206, +// JSON-NEXT: "offset": 8223, +// JSON-NEXT: "line": 209, // JSON-NEXT: "col": 10, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 7985, -// JSON-NEXT: "line": 205, +// JSON-NEXT: "offset": 8173, +// JSON-NEXT: "line": 208, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 8069, -// JSON-NEXT: "line": 206, +// JSON-NEXT: "offset": 8257, +// JSON-NEXT: "line": 209, // JSON-NEXT: "col": 44, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -7787,12 +7830,12 @@ namespace GH153540 { // JSON-NEXT: "kind": "DeclRefExpr", // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 8059, +// JSON-NEXT: "offset": 8247, // JSON-NEXT: "col": 34, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 8059, +// JSON-NEXT: "offset": 8247, // JSON-NEXT: "col": 34, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7826,12 +7869,12 @@ namespace GH153540 { // JSON-NEXT: "kind": "DeclRefExpr", // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 8064, +// JSON-NEXT: "offset": 8252, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 8064, +// JSON-NEXT: "offset": 8252, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7855,19 +7898,19 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "TemplateTypeParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 8004, -// JSON-NEXT: "line": 205, +// JSON-NEXT: "offset": 8192, +// JSON-NEXT: "line": 208, // JSON-NEXT: "col": 22, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 7995, +// JSON-NEXT: "offset": 8183, // JSON-NEXT: "col": 13, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 8004, +// JSON-NEXT: "offset": 8192, // JSON-NEXT: "col": 22, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7882,18 +7925,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "NonTypeTemplateParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 8013, +// JSON-NEXT: "offset": 8201, // JSON-NEXT: "col": 31, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 8008, +// JSON-NEXT: "offset": 8196, // JSON-NEXT: "col": 26, // JSON-NEXT: "tokLen": 4 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 8013, +// JSON-NEXT: "offset": 8201, // JSON-NEXT: "col": 31, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7910,18 +7953,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "NonTypeTemplateParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 8022, +// JSON-NEXT: "offset": 8210, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 8017, +// JSON-NEXT: "offset": 8205, // JSON-NEXT: "col": 35, // JSON-NEXT: "tokLen": 4 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 8022, +// JSON-NEXT: "offset": 8210, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 2 // JSON-NEXT: } @@ -7938,19 +7981,19 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXRecordDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 8035, -// JSON-NEXT: "line": 206, +// JSON-NEXT: "offset": 8223, +// JSON-NEXT: "line": 209, // JSON-NEXT: "col": 10, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 8028, +// JSON-NEXT: "offset": 8216, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 6 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 8035, +// JSON-NEXT: "offset": 8223, // JSON-NEXT: "col": 10, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: } @@ -7967,20 +8010,20 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "NamespaceDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9382, -// JSON-NEXT: "line": 224, +// JSON-NEXT: "offset": 9570, +// JSON-NEXT: "line": 227, // JSON-NEXT: "col": 11, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9372, +// JSON-NEXT: "offset": 9560, // JSON-NEXT: "col": 1, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9791, -// JSON-NEXT: "line": 238, +// JSON-NEXT: "offset": 9979, +// JSON-NEXT: "line": 241, // JSON-NEXT: "col": 1, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -7991,20 +8034,20 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "NamespaceDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9456, -// JSON-NEXT: "line": 227, +// JSON-NEXT: "offset": 9644, +// JSON-NEXT: "line": 230, // JSON-NEXT: "col": 13, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9446, +// JSON-NEXT: "offset": 9634, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 9 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9507, -// JSON-NEXT: "line": 229, +// JSON-NEXT: "offset": 9695, +// JSON-NEXT: "line": 232, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8015,19 +8058,19 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ClassTemplateDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, -// JSON-NEXT: "line": 228, +// JSON-NEXT: "offset": 9680, +// JSON-NEXT: "line": 231, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9464, +// JSON-NEXT: "offset": 9652, // JSON-NEXT: "col": 5, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9502, +// JSON-NEXT: "offset": 9690, // JSON-NEXT: "col": 43, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8038,18 +8081,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "TemplateTypeParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9482, +// JSON-NEXT: "offset": 9670, // JSON-NEXT: "col": 23, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9473, +// JSON-NEXT: "offset": 9661, // JSON-NEXT: "col": 14, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9482, +// JSON-NEXT: "offset": 9670, // JSON-NEXT: "col": 23, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8064,18 +8107,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXRecordDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9485, +// JSON-NEXT: "offset": 9673, // JSON-NEXT: "col": 26, // JSON-NEXT: "tokLen": 6 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9502, +// JSON-NEXT: "offset": 9690, // JSON-NEXT: "col": 43, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8130,18 +8173,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXRecordDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9485, +// JSON-NEXT: "offset": 9673, // JSON-NEXT: "col": 26, // JSON-NEXT: "tokLen": 6 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8154,18 +8197,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXConstructorDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9496, +// JSON-NEXT: "offset": 9684, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9496, +// JSON-NEXT: "offset": 9684, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9499, +// JSON-NEXT: "offset": 9687, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8179,18 +8222,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ParmVarDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9499, +// JSON-NEXT: "offset": 9687, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9498, +// JSON-NEXT: "offset": 9686, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9498, +// JSON-NEXT: "offset": 9686, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8207,18 +8250,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ClassTemplateSpecializationDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9464, +// JSON-NEXT: "offset": 9652, // JSON-NEXT: "col": 5, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9502, +// JSON-NEXT: "offset": 9690, // JSON-NEXT: "col": 43, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8286,18 +8329,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXRecordDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9485, +// JSON-NEXT: "offset": 9673, // JSON-NEXT: "col": 26, // JSON-NEXT: "tokLen": 6 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8310,18 +8353,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXConstructorDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9496, +// JSON-NEXT: "offset": 9684, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9496, +// JSON-NEXT: "offset": 9684, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9499, +// JSON-NEXT: "offset": 9687, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8337,18 +8380,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ParmVarDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9499, +// JSON-NEXT: "offset": 9687, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9498, +// JSON-NEXT: "offset": 9686, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9498, +// JSON-NEXT: "offset": 9686, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8363,18 +8406,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXConstructorDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8393,18 +8436,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ParmVarDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8419,18 +8462,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXConstructorDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8449,18 +8492,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ParmVarDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8475,18 +8518,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXDestructorDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8510,18 +8553,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "FunctionTemplateDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9496, +// JSON-NEXT: "offset": 9684, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9464, +// JSON-NEXT: "offset": 9652, // JSON-NEXT: "col": 5, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9499, +// JSON-NEXT: "offset": 9687, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8533,18 +8576,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "TemplateTypeParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9482, +// JSON-NEXT: "offset": 9670, // JSON-NEXT: "col": 23, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9473, +// JSON-NEXT: "offset": 9661, // JSON-NEXT: "col": 14, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9482, +// JSON-NEXT: "offset": 9670, // JSON-NEXT: "col": 23, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8559,18 +8602,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXDeductionGuideDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9496, +// JSON-NEXT: "offset": 9684, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9496, +// JSON-NEXT: "offset": 9684, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9499, +// JSON-NEXT: "offset": 9687, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8585,18 +8628,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ParmVarDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9499, +// JSON-NEXT: "offset": 9687, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9498, +// JSON-NEXT: "offset": 9686, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9498, +// JSON-NEXT: "offset": 9686, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8611,18 +8654,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXDeductionGuideDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9496, +// JSON-NEXT: "offset": 9684, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9496, +// JSON-NEXT: "offset": 9684, // JSON-NEXT: "col": 37, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9499, +// JSON-NEXT: "offset": 9687, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8653,18 +8696,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ParmVarDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9499, +// JSON-NEXT: "offset": 9687, // JSON-NEXT: "col": 40, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9498, +// JSON-NEXT: "offset": 9686, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9498, +// JSON-NEXT: "offset": 9686, // JSON-NEXT: "col": 39, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8681,18 +8724,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "FunctionTemplateDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9464, +// JSON-NEXT: "offset": 9652, // JSON-NEXT: "col": 5, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8704,18 +8747,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "TemplateTypeParmDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9482, +// JSON-NEXT: "offset": 9670, // JSON-NEXT: "col": 23, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9473, +// JSON-NEXT: "offset": 9661, // JSON-NEXT: "col": 14, // JSON-NEXT: "tokLen": 8 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9482, +// JSON-NEXT: "offset": 9670, // JSON-NEXT: "col": 23, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8730,18 +8773,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "CXXDeductionGuideDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8756,18 +8799,18 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "ParmVarDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9492, +// JSON-NEXT: "offset": 9680, // JSON-NEXT: "col": 33, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8786,20 +8829,20 @@ namespace GH153540 { // JSON-NEXT: "id": "0x{{.*}}", // JSON-NEXT: "kind": "FunctionDecl", // JSON-NEXT: "loc": { -// JSON-NEXT: "offset": 9516, -// JSON-NEXT: "line": 230, +// JSON-NEXT: "offset": 9704, +// JSON-NEXT: "line": 233, // JSON-NEXT: "col": 8, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9511, +// JSON-NEXT: "offset": 9699, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 4 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9537, -// JSON-NEXT: "line": 232, +// JSON-NEXT: "offset": 9725, +// JSON-NEXT: "line": 235, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8815,14 +8858,14 @@ namespace GH153540 { // JSON-NEXT: "kind": "CompoundStmt", // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9520, -// JSON-NEXT: "line": 230, +// JSON-NEXT: "offset": 9708, +// JSON-NEXT: "line": 233, // JSON-NEXT: "col": 12, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9537, -// JSON-NEXT: "line": 232, +// JSON-NEXT: "offset": 9725, +// JSON-NEXT: "line": 235, // JSON-NEXT: "col": 3, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8833,13 +8876,13 @@ namespace GH153540 { // JSON-NEXT: "kind": "CXXFunctionalCastExpr", // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9526, -// JSON-NEXT: "line": 231, +// JSON-NEXT: "offset": 9714, +// JSON-NEXT: "line": 234, // JSON-NEXT: "col": 5, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9532, +// JSON-NEXT: "offset": 9720, // JSON-NEXT: "col": 11, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8864,12 +8907,12 @@ namespace GH153540 { // JSON-NEXT: "kind": "CXXConstructExpr", // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9526, +// JSON-NEXT: "offset": 9714, // JSON-NEXT: "col": 5, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9532, +// JSON-NEXT: "offset": 9720, // JSON-NEXT: "col": 11, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8890,12 +8933,12 @@ namespace GH153540 { // JSON-NEXT: "kind": "IntegerLiteral", // JSON-NEXT: "range": { // JSON-NEXT: "begin": { -// JSON-NEXT: "offset": 9531, +// JSON-NEXT: "offset": 9719, // JSON-NEXT: "col": 10, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: }, // JSON-NEXT: "end": { -// JSON-NEXT: "offset": 9531, +// JSON-NEXT: "offset": 9719, // JSON-NEXT: "col": 10, // JSON-NEXT: "tokLen": 1 // JSON-NEXT: } @@ -8915,6 +8958,282 @@ namespace GH153540 { // JSON-NEXT: ] // JSON-NEXT: } // JSON-NEXT: ] +// JSON-NEXT: }, +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "NamespaceDecl", +// JSON-NEXT: "loc": { +// JSON-NEXT: "offset": 10014, +// JSON-NEXT: "line": 243, +// JSON-NEXT: "col": 11, +// JSON-NEXT: "tokLen": 40 +// JSON-NEXT: }, +// JSON-NEXT: "range": { +// JSON-NEXT: "begin": { +// JSON-NEXT: "offset": 10004, +// JSON-NEXT: "col": 1, +// JSON-NEXT: "tokLen": 9 +// JSON-NEXT: }, +// JSON-NEXT: "end": { +// JSON-NEXT: "offset": 11286, +// JSON-NEXT: "line": 263, +// JSON-NEXT: "col": 1, +// JSON-NEXT: "tokLen": 1 +// JSON-NEXT: } +// JSON-NEXT: }, +// JSON-NEXT: "name": "AliasDependentTemplateSpecializationType", +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TypeAliasTemplateDecl", +// JSON-NEXT: "loc": { +// JSON-NEXT: "offset": 10179, +// JSON-NEXT: "line": 246, +// JSON-NEXT: "col": 38, +// JSON-NEXT: "tokLen": 5 +// JSON-NEXT: }, +// JSON-NEXT: "range": { +// JSON-NEXT: "begin": { +// JSON-NEXT: "offset": 10144, +// JSON-NEXT: "col": 3, +// JSON-NEXT: "tokLen": 8 +// JSON-NEXT: }, +// JSON-NEXT: "end": { +// JSON-NEXT: "offset": 10196, +// JSON-NEXT: "col": 55, +// JSON-NEXT: "tokLen": 1 +// JSON-NEXT: } +// JSON-NEXT: }, +// JSON-NEXT: "name": "T1", +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TemplateTemplateParmDecl", +// JSON-NEXT: "loc": { +// JSON-NEXT: "offset": 10175, +// JSON-NEXT: "col": 34, +// JSON-NEXT: "tokLen": 2 +// JSON-NEXT: }, +// JSON-NEXT: "range": { +// JSON-NEXT: "begin": { +// JSON-NEXT: "offset": 10153, +// JSON-NEXT: "col": 12, +// JSON-NEXT: "tokLen": 8 +// JSON-NEXT: }, +// JSON-NEXT: "end": { +// JSON-NEXT: "offset": 10175, +// JSON-NEXT: "col": 34, +// JSON-NEXT: "tokLen": 2 +// JSON-NEXT: } +// JSON-NEXT: }, +// JSON-NEXT: "name": "TT", +// JSON-NEXT: "depth": 0, +// JSON-NEXT: "index": 0, +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TemplateTypeParmDecl", +// JSON-NEXT: "loc": { +// JSON-NEXT: "offset": 10167, +// JSON-NEXT: "col": 26, +// JSON-NEXT: "tokLen": 1 +// JSON-NEXT: }, +// JSON-NEXT: "range": { +// JSON-NEXT: "begin": { +// JSON-NEXT: "offset": 10162, +// JSON-NEXT: "col": 21, +// JSON-NEXT: "tokLen": 5 +// JSON-NEXT: }, +// JSON-NEXT: "end": { +// JSON-NEXT: "offset": 10162, +// JSON-NEXT: "col": 21, +// JSON-NEXT: "tokLen": 5 +// JSON-NEXT: } +// JSON-NEXT: }, +// JSON-NEXT: "tagUsed": "class", +// JSON-NEXT: "depth": 1, +// JSON-NEXT: "index": 0 +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: }, +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TypeAliasDecl", +// JSON-NEXT: "loc": { +// JSON-NEXT: "offset": 10185, +// JSON-NEXT: "col": 44, +// JSON-NEXT: "tokLen": 2 +// JSON-NEXT: }, +// JSON-NEXT: "range": { +// JSON-NEXT: "begin": { +// JSON-NEXT: "offset": 10179, +// JSON-NEXT: "col": 38, +// JSON-NEXT: "tokLen": 5 +// JSON-NEXT: }, +// JSON-NEXT: "end": { +// JSON-NEXT: "offset": 10196, +// JSON-NEXT: "col": 55, +// JSON-NEXT: "tokLen": 1 +// JSON-NEXT: } +// JSON-NEXT: }, +// JSON-NEXT: "name": "T1", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "TT" +// JSON-NEXT: }, +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TemplateSpecializationType", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "TT" +// JSON-NEXT: }, +// JSON-NEXT: "isDependent": true, +// JSON-NEXT: "isInstantiationDependent": true, +// JSON-NEXT: "templateName": "TT", +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "kind": "TemplateArgument", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "int" +// JSON-NEXT: }, +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "BuiltinType", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "int" +// JSON-NEXT: } +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: }, +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TypeAliasTemplateDecl", +// JSON-NEXT: "loc": { +// JSON-NEXT: "offset": 10219, +// JSON-NEXT: "line": 247, +// JSON-NEXT: "col": 21, +// JSON-NEXT: "tokLen": 5 +// JSON-NEXT: }, +// JSON-NEXT: "range": { +// JSON-NEXT: "begin": { +// JSON-NEXT: "offset": 10201, +// JSON-NEXT: "col": 3, +// JSON-NEXT: "tokLen": 8 +// JSON-NEXT: }, +// JSON-NEXT: "end": { +// JSON-NEXT: "offset": 10246, +// JSON-NEXT: "col": 48, +// JSON-NEXT: "tokLen": 1 +// JSON-NEXT: } +// JSON-NEXT: }, +// JSON-NEXT: "name": "T2", +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TemplateTypeParmDecl", +// JSON-NEXT: "loc": { +// JSON-NEXT: "offset": 10216, +// JSON-NEXT: "col": 18, +// JSON-NEXT: "tokLen": 1 +// JSON-NEXT: }, +// JSON-NEXT: "range": { +// JSON-NEXT: "begin": { +// JSON-NEXT: "offset": 10210, +// JSON-NEXT: "col": 12, +// JSON-NEXT: "tokLen": 5 +// JSON-NEXT: }, +// JSON-NEXT: "end": { +// JSON-NEXT: "offset": 10216, +// JSON-NEXT: "col": 18, +// JSON-NEXT: "tokLen": 1 +// JSON-NEXT: } +// JSON-NEXT: }, +// JSON-NEXT: "name": "T", +// JSON-NEXT: "tagUsed": "class", +// JSON-NEXT: "depth": 0, +// JSON-NEXT: "index": 0 +// JSON-NEXT: }, +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TypeAliasDecl", +// JSON-NEXT: "loc": { +// JSON-NEXT: "offset": 10225, +// JSON-NEXT: "col": 27, +// JSON-NEXT: "tokLen": 2 +// JSON-NEXT: }, +// JSON-NEXT: "range": { +// JSON-NEXT: "begin": { +// JSON-NEXT: "offset": 10219, +// JSON-NEXT: "col": 21, +// JSON-NEXT: "tokLen": 5 +// JSON-NEXT: }, +// JSON-NEXT: "end": { +// JSON-NEXT: "offset": 10246, +// JSON-NEXT: "col": 48, +// JSON-NEXT: "tokLen": 1 +// JSON-NEXT: } +// JSON-NEXT: }, +// JSON-NEXT: "name": "T2", +// JSON-NEXT: "type": { +// JSON-NEXT: "desugaredQualType": "T::template X", +// JSON-NEXT: "qualType": "T1" +// JSON-NEXT: }, +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TemplateSpecializationType", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "T1" +// JSON-NEXT: }, +// JSON-NEXT: "isDependent": true, +// JSON-NEXT: "isInstantiationDependent": true, +// JSON-NEXT: "isAlias": true, +// JSON-NEXT: "templateName": "T1", +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "kind": "TemplateArgument" +// JSON-NEXT: }, +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "TemplateSpecializationType", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "T::template X" +// JSON-NEXT: }, +// JSON-NEXT: "isDependent": true, +// JSON-NEXT: "isInstantiationDependent": true, +// JSON-NEXT: "templateName": "T::template X", +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "kind": "TemplateArgument", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "int" +// JSON-NEXT: }, +// JSON-NEXT: "inner": [ +// JSON-NEXT: { +// JSON-NEXT: "id": "0x{{.*}}", +// JSON-NEXT: "kind": "BuiltinType", +// JSON-NEXT: "type": { +// JSON-NEXT: "qualType": "int" +// JSON-NEXT: } +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: } +// JSON-NEXT: ] +// JSON-NEXT: } +// JSON-NEXT: ] // JSON-NEXT: } // JSON-NEXT: ] // JSON-NEXT: } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 0ed029c39885f..9526f629bda42 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -1834,19 +1834,6 @@ bool CursorVisitor::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) { return VisitNestedNameSpecifierLoc(TL.getQualifierLoc()); } -bool CursorVisitor::VisitDependentTemplateSpecializationTypeLoc( - DependentTemplateSpecializationTypeLoc TL) { - if (VisitNestedNameSpecifierLoc(TL.getQualifierLoc())) - return true; - - // Visit the template arguments. - for (unsigned I = 0, N = TL.getNumArgs(); I != N; ++I) - if (VisitTemplateArgumentLoc(TL.getArgLoc(I))) - return true; - - return false; -} - bool CursorVisitor::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) { return Visit(TL.getPatternLoc()); } diff --git a/clang/tools/libclang/CXIndexDataConsumer.cpp b/clang/tools/libclang/CXIndexDataConsumer.cpp index 423dd1b25adad..932201a94cdae 100644 --- a/clang/tools/libclang/CXIndexDataConsumer.cpp +++ b/clang/tools/libclang/CXIndexDataConsumer.cpp @@ -393,8 +393,6 @@ SourceLocation CXIndexDataConsumer::CXXBasesListInfo::getBaseLoc( // TypeLoc::getNameLoc() if (auto TTL = TL.getAs()) return TTL.getNameLoc(); - if (auto TTL = TL.getAs()) - return TTL.getTemplateNameLoc(); if (auto TTL = TL.getAs()) return TTL.getTemplateNameLoc(); if (auto TTL = TL.getAs()) diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index ac40a871c0252..e7160bcf2e0c2 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -774,8 +774,8 @@ TEST_P(ImportType, ImportDependentTemplateSpecialization) { " typename A::template B a;" "};", Lang_CXX03, "", Lang_CXX03, Verifier, - classTemplateDecl(has(cxxRecordDecl(has( - fieldDecl(hasType(dependentTemplateSpecializationType()))))))); + classTemplateDecl(has(cxxRecordDecl( + has(fieldDecl(hasType(templateSpecializationType()))))))); } TEST_P(ImportType, ImportDeducedTemplateSpecialization) { diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index d7df9cae01f33..9692d6e6fae97 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -2031,7 +2031,7 @@ TEST_P(ASTMatchersTest, DependentTemplateSpecializationType) { typename A::template B a; }; )", - dependentTemplateSpecializationType())); + templateSpecializationType())); } TEST_P(ASTMatchersTest, RecordType) { diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 39aacdb58e694..e4544cd66f49b 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -3962,8 +3962,6 @@ TypeSystemClang::GetTypeInfo(lldb::opaque_compiler_type_t type, return 0; case clang::Type::DependentSizedExtVector: return eTypeHasChildren | eTypeIsVector; - case clang::Type::DependentTemplateSpecialization: - return eTypeIsTemplate; case clang::Type::Enum: if (pointee_or_element_clang_type) @@ -4237,8 +4235,6 @@ TypeSystemClang::GetTypeClass(lldb::opaque_compiler_type_t type) { break; case clang::Type::DependentName: break; - case clang::Type::DependentTemplateSpecialization: - break; case clang::Type::PackExpansion: break; @@ -5108,7 +5104,6 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type, case clang::Type::SubstTemplateTypeParmPack: case clang::Type::InjectedClassName: case clang::Type::DependentName: - case clang::Type::DependentTemplateSpecialization: case clang::Type::PackExpansion: case clang::Type::ObjCObject: @@ -5277,7 +5272,6 @@ lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) { case clang::Type::SubstTemplateTypeParmPack: case clang::Type::InjectedClassName: case clang::Type::DependentName: - case clang::Type::DependentTemplateSpecialization: case clang::Type::PackExpansion: case clang::Type::ObjCObject: @@ -6171,8 +6165,6 @@ uint32_t TypeSystemClang::GetNumPointeeChildren(clang::QualType type) { return 0; case clang::Type::DependentName: return 1; - case clang::Type::DependentTemplateSpecialization: - return 1; case clang::Type::ObjCObject: return 0; case clang::Type::ObjCInterface: From a848008e1996f8934dee0a297975ac0e6b4200ec Mon Sep 17 00:00:00 2001 From: Dmitry Vasilyev Date: Fri, 12 Sep 2025 20:56:21 +0400 Subject: [PATCH 158/734] [lldb] Fixed UB in CPlusPlusLanguage plug-in (#158304) C++11 allows the use of Universal Character Names (UCNs) in identifiers, including function names. According to the spec the behavior of std::isalpha(ch) and std::isalnum(ch) is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char. --- .../source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp | 6 ++++-- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 277de8f444828..1f7b8d48d0fc8 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -190,14 +190,16 @@ static bool IsTrivialBasename(const llvm::StringRef &basename) { if (basename.size() <= idx) return false; // Empty string or "~" - if (!std::isalpha(basename[idx]) && basename[idx] != '_') + if (!std::isalpha(static_cast(basename[idx])) && + basename[idx] != '_') return false; // First character (after removing the possible '~'') isn't in // [A-Za-z_] // Read all characters matching [A-Za-z_0-9] ++idx; while (idx < basename.size()) { - if (!std::isalnum(basename[idx]) && basename[idx] != '_') + if (!std::isalnum(static_cast(basename[idx])) && + basename[idx] != '_') break; ++idx; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp index 6053d042b29b1..141c5c9a2caf9 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -40,8 +40,10 @@ using namespace lldb_private::formatters; static void consumeInlineNamespace(llvm::StringRef &name) { // Delete past an inline namespace, if any: __[a-zA-Z0-9_]+:: auto scratch = name; - if (scratch.consume_front("__") && std::isalnum(scratch[0])) { - scratch = scratch.drop_while([](char c) { return std::isalnum(c); }); + if (scratch.consume_front("__") && + std::isalnum(static_cast(scratch[0]))) { + scratch = scratch.drop_while( + [](char c) { return std::isalnum(static_cast(c)); }); if (scratch.consume_front("::")) { // Successfully consumed a namespace. name = scratch; From d75b837ff4c27c8ab39a11a50ff64db3687503a7 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 12 Sep 2025 10:02:00 -0700 Subject: [PATCH 159/734] [RISCV] Support umin/umax in tryFoldSelectIntoOp (#157548) The neutral values for these are -1U, and 0 respectively. We already have good arithmetic lowerings for selects with one arm equal to these values. smin/smax are a bit harder, and will be a separate change. Somewhat surprisingly, this looks to be a net code improvement in all of the configurations. With both zbb, it's a clear win. With only zicond, we still seem to come out ahead because we reduce the number of ziconds needed (since we lower min/max to them). Without either zbb or zicond, we're a bit more of wash, but the available arithmetic sequences are good enough that doing the select unconditionally before using branches for the min/max is probably still worthwhile? --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 + llvm/test/CodeGen/RISCV/select-zbb.ll | 720 ++++++++------------ 2 files changed, 286 insertions(+), 436 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1d01de336b787..523b857f9e6cd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -18839,6 +18839,8 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, case ISD::ADD: case ISD::OR: case ISD::XOR: + case ISD::UMIN: + case ISD::UMAX: break; } diff --git a/llvm/test/CodeGen/RISCV/select-zbb.ll b/llvm/test/CodeGen/RISCV/select-zbb.ll index 0af699aae3288..efc3f46376b4e 100644 --- a/llvm/test/CodeGen/RISCV/select-zbb.ll +++ b/llvm/test/CodeGen/RISCV/select-zbb.ll @@ -12,96 +12,80 @@ define i32 @select_umin_1(i1 zeroext %cond, i32 %a, i32 %b) { ; RV32IM-LABEL: select_umin_1: ; RV32IM: # %bb.0: # %entry -; RV32IM-NEXT: bgeu a1, a2, .LBB0_3 +; RV32IM-NEXT: addi a0, a0, -1 +; RV32IM-NEXT: or a1, a0, a1 +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: bltu a2, a1, .LBB0_2 ; RV32IM-NEXT: # %bb.1: # %entry -; RV32IM-NEXT: beqz a0, .LBB0_4 -; RV32IM-NEXT: .LBB0_2: # %entry ; RV32IM-NEXT: mv a0, a1 -; RV32IM-NEXT: ret -; RV32IM-NEXT: .LBB0_3: # %entry -; RV32IM-NEXT: mv a1, a2 -; RV32IM-NEXT: bnez a0, .LBB0_2 -; RV32IM-NEXT: .LBB0_4: # %entry -; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: .LBB0_2: # %entry ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_umin_1: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: sext.w a3, a2 +; RV64IM-NEXT: mv a3, a0 +; RV64IM-NEXT: sext.w a0, a2 +; RV64IM-NEXT: addi a3, a3, -1 +; RV64IM-NEXT: or a1, a3, a1 ; RV64IM-NEXT: sext.w a1, a1 -; RV64IM-NEXT: bgeu a1, a3, .LBB0_3 +; RV64IM-NEXT: bltu a0, a1, .LBB0_2 ; RV64IM-NEXT: # %bb.1: # %entry -; RV64IM-NEXT: beqz a0, .LBB0_4 -; RV64IM-NEXT: .LBB0_2: # %entry ; RV64IM-NEXT: mv a0, a1 -; RV64IM-NEXT: ret -; RV64IM-NEXT: .LBB0_3: # %entry -; RV64IM-NEXT: mv a1, a3 -; RV64IM-NEXT: bnez a0, .LBB0_2 -; RV64IM-NEXT: .LBB0_4: # %entry -; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: .LBB0_2: # %entry ; RV64IM-NEXT: ret ; ; RV32IMZBB-LABEL: select_umin_1: ; RV32IMZBB: # %bb.0: # %entry -; RV32IMZBB-NEXT: beqz a0, .LBB0_2 -; RV32IMZBB-NEXT: # %bb.1: -; RV32IMZBB-NEXT: minu a2, a1, a2 -; RV32IMZBB-NEXT: .LBB0_2: # %entry -; RV32IMZBB-NEXT: mv a0, a2 +; RV32IMZBB-NEXT: addi a0, a0, -1 +; RV32IMZBB-NEXT: or a0, a0, a1 +; RV32IMZBB-NEXT: minu a0, a2, a0 ; RV32IMZBB-NEXT: ret ; ; RV64IMZBB-LABEL: select_umin_1: ; RV64IMZBB: # %bb.0: # %entry -; RV64IMZBB-NEXT: beqz a0, .LBB0_2 -; RV64IMZBB-NEXT: # %bb.1: ; RV64IMZBB-NEXT: sext.w a2, a2 -; RV64IMZBB-NEXT: sext.w a1, a1 -; RV64IMZBB-NEXT: minu a2, a1, a2 -; RV64IMZBB-NEXT: .LBB0_2: # %entry -; RV64IMZBB-NEXT: mv a0, a2 +; RV64IMZBB-NEXT: addi a0, a0, -1 +; RV64IMZBB-NEXT: or a0, a0, a1 +; RV64IMZBB-NEXT: sext.w a0, a0 +; RV64IMZBB-NEXT: minu a0, a2, a0 ; RV64IMZBB-NEXT: ret ; ; RV32IMZICOND-LABEL: select_umin_1: ; RV32IMZICOND: # %bb.0: # %entry -; RV32IMZICOND-NEXT: sltu a3, a1, a2 -; RV32IMZICOND-NEXT: czero.nez a4, a2, a3 -; RV32IMZICOND-NEXT: czero.eqz a1, a1, a3 -; RV32IMZICOND-NEXT: or a1, a1, a4 -; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 -; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: addi a0, a0, -1 +; RV32IMZICOND-NEXT: or a0, a0, a1 +; RV32IMZICOND-NEXT: sltu a1, a2, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a0, a1 +; RV32IMZICOND-NEXT: czero.eqz a1, a2, a1 ; RV32IMZICOND-NEXT: or a0, a1, a0 ; RV32IMZICOND-NEXT: ret ; ; RV64IMZICOND-LABEL: select_umin_1: ; RV64IMZICOND: # %bb.0: # %entry -; RV64IMZICOND-NEXT: sext.w a3, a2 -; RV64IMZICOND-NEXT: sext.w a1, a1 -; RV64IMZICOND-NEXT: sltu a4, a1, a3 -; RV64IMZICOND-NEXT: czero.nez a3, a3, a4 -; RV64IMZICOND-NEXT: czero.eqz a1, a1, a4 -; RV64IMZICOND-NEXT: or a1, a1, a3 -; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 -; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: sext.w a2, a2 +; RV64IMZICOND-NEXT: addi a0, a0, -1 +; RV64IMZICOND-NEXT: or a0, a0, a1 +; RV64IMZICOND-NEXT: sext.w a0, a0 +; RV64IMZICOND-NEXT: sltu a1, a2, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a0, a1 +; RV64IMZICOND-NEXT: czero.eqz a1, a2, a1 ; RV64IMZICOND-NEXT: or a0, a1, a0 ; RV64IMZICOND-NEXT: ret ; ; RV32IMBOTH-LABEL: select_umin_1: ; RV32IMBOTH: # %bb.0: # %entry -; RV32IMBOTH-NEXT: minu a1, a1, a2 -; RV32IMBOTH-NEXT: czero.nez a2, a2, a0 -; RV32IMBOTH-NEXT: czero.eqz a0, a1, a0 -; RV32IMBOTH-NEXT: or a0, a0, a2 +; RV32IMBOTH-NEXT: addi a0, a0, -1 +; RV32IMBOTH-NEXT: or a0, a0, a1 +; RV32IMBOTH-NEXT: minu a0, a2, a0 ; RV32IMBOTH-NEXT: ret ; ; RV64IMBOTH-LABEL: select_umin_1: ; RV64IMBOTH: # %bb.0: # %entry -; RV64IMBOTH-NEXT: sext.w a3, a2 -; RV64IMBOTH-NEXT: sext.w a1, a1 -; RV64IMBOTH-NEXT: minu a1, a1, a3 -; RV64IMBOTH-NEXT: czero.nez a2, a2, a0 -; RV64IMBOTH-NEXT: czero.eqz a0, a1, a0 -; RV64IMBOTH-NEXT: or a0, a0, a2 +; RV64IMBOTH-NEXT: sext.w a2, a2 +; RV64IMBOTH-NEXT: addi a0, a0, -1 +; RV64IMBOTH-NEXT: or a0, a0, a1 +; RV64IMBOTH-NEXT: sext.w a0, a0 +; RV64IMBOTH-NEXT: minu a0, a2, a0 ; RV64IMBOTH-NEXT: ret entry: %c = call i32 @llvm.umin(i32 %a, i32 %b) @@ -112,97 +96,80 @@ entry: define i32 @select_umin_2(i1 zeroext %cond, i32 %a, i32 %b) { ; RV32IM-LABEL: select_umin_2: ; RV32IM: # %bb.0: # %entry -; RV32IM-NEXT: mv a3, a1 -; RV32IM-NEXT: bgeu a1, a2, .LBB1_3 +; RV32IM-NEXT: neg a0, a0 +; RV32IM-NEXT: or a2, a0, a2 +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: bltu a1, a2, .LBB1_2 ; RV32IM-NEXT: # %bb.1: # %entry -; RV32IM-NEXT: beqz a0, .LBB1_4 +; RV32IM-NEXT: mv a0, a2 ; RV32IM-NEXT: .LBB1_2: # %entry -; RV32IM-NEXT: mv a0, a1 -; RV32IM-NEXT: ret -; RV32IM-NEXT: .LBB1_3: # %entry -; RV32IM-NEXT: mv a3, a2 -; RV32IM-NEXT: bnez a0, .LBB1_2 -; RV32IM-NEXT: .LBB1_4: # %entry -; RV32IM-NEXT: mv a0, a3 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_umin_2: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: sext.w a3, a2 -; RV64IM-NEXT: sext.w a2, a1 -; RV64IM-NEXT: bgeu a2, a3, .LBB1_3 +; RV64IM-NEXT: mv a3, a0 +; RV64IM-NEXT: sext.w a0, a1 +; RV64IM-NEXT: neg a1, a3 +; RV64IM-NEXT: or a1, a1, a2 +; RV64IM-NEXT: sext.w a1, a1 +; RV64IM-NEXT: bltu a0, a1, .LBB1_2 ; RV64IM-NEXT: # %bb.1: # %entry -; RV64IM-NEXT: beqz a0, .LBB1_4 -; RV64IM-NEXT: .LBB1_2: # %entry ; RV64IM-NEXT: mv a0, a1 -; RV64IM-NEXT: ret -; RV64IM-NEXT: .LBB1_3: # %entry -; RV64IM-NEXT: mv a2, a3 -; RV64IM-NEXT: bnez a0, .LBB1_2 -; RV64IM-NEXT: .LBB1_4: # %entry -; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: .LBB1_2: # %entry ; RV64IM-NEXT: ret ; ; RV32IMZBB-LABEL: select_umin_2: ; RV32IMZBB: # %bb.0: # %entry -; RV32IMZBB-NEXT: bnez a0, .LBB1_2 -; RV32IMZBB-NEXT: # %bb.1: # %entry -; RV32IMZBB-NEXT: minu a1, a1, a2 -; RV32IMZBB-NEXT: .LBB1_2: # %entry -; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: neg a0, a0 +; RV32IMZBB-NEXT: or a0, a0, a2 +; RV32IMZBB-NEXT: minu a0, a1, a0 ; RV32IMZBB-NEXT: ret ; ; RV64IMZBB-LABEL: select_umin_2: ; RV64IMZBB: # %bb.0: # %entry -; RV64IMZBB-NEXT: bnez a0, .LBB1_2 -; RV64IMZBB-NEXT: # %bb.1: # %entry -; RV64IMZBB-NEXT: sext.w a2, a2 ; RV64IMZBB-NEXT: sext.w a1, a1 -; RV64IMZBB-NEXT: minu a1, a1, a2 -; RV64IMZBB-NEXT: .LBB1_2: # %entry -; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: neg a0, a0 +; RV64IMZBB-NEXT: or a0, a0, a2 +; RV64IMZBB-NEXT: sext.w a0, a0 +; RV64IMZBB-NEXT: minu a0, a1, a0 ; RV64IMZBB-NEXT: ret ; ; RV32IMZICOND-LABEL: select_umin_2: ; RV32IMZICOND: # %bb.0: # %entry -; RV32IMZICOND-NEXT: sltu a3, a1, a2 -; RV32IMZICOND-NEXT: czero.nez a2, a2, a3 -; RV32IMZICOND-NEXT: czero.eqz a3, a1, a3 -; RV32IMZICOND-NEXT: or a2, a3, a2 -; RV32IMZICOND-NEXT: czero.nez a2, a2, a0 -; RV32IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32IMZICOND-NEXT: neg a0, a0 ; RV32IMZICOND-NEXT: or a0, a0, a2 +; RV32IMZICOND-NEXT: sltu a2, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a0, a2 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a2 +; RV32IMZICOND-NEXT: or a0, a1, a0 ; RV32IMZICOND-NEXT: ret ; ; RV64IMZICOND-LABEL: select_umin_2: ; RV64IMZICOND: # %bb.0: # %entry -; RV64IMZICOND-NEXT: sext.w a2, a2 -; RV64IMZICOND-NEXT: sext.w a3, a1 -; RV64IMZICOND-NEXT: sltu a4, a3, a2 -; RV64IMZICOND-NEXT: czero.nez a2, a2, a4 -; RV64IMZICOND-NEXT: czero.eqz a3, a3, a4 -; RV64IMZICOND-NEXT: or a2, a3, a2 -; RV64IMZICOND-NEXT: czero.nez a2, a2, a0 -; RV64IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: neg a0, a0 ; RV64IMZICOND-NEXT: or a0, a0, a2 +; RV64IMZICOND-NEXT: sext.w a0, a0 +; RV64IMZICOND-NEXT: sltu a2, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a0, a2 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a2 +; RV64IMZICOND-NEXT: or a0, a1, a0 ; RV64IMZICOND-NEXT: ret ; ; RV32IMBOTH-LABEL: select_umin_2: ; RV32IMBOTH: # %bb.0: # %entry -; RV32IMBOTH-NEXT: minu a2, a1, a2 -; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 -; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 -; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: neg a0, a0 +; RV32IMBOTH-NEXT: or a0, a0, a2 +; RV32IMBOTH-NEXT: minu a0, a1, a0 ; RV32IMBOTH-NEXT: ret ; ; RV64IMBOTH-LABEL: select_umin_2: ; RV64IMBOTH: # %bb.0: # %entry -; RV64IMBOTH-NEXT: sext.w a2, a2 -; RV64IMBOTH-NEXT: sext.w a3, a1 -; RV64IMBOTH-NEXT: minu a2, a3, a2 -; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 -; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 -; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: neg a0, a0 +; RV64IMBOTH-NEXT: or a0, a0, a2 +; RV64IMBOTH-NEXT: sext.w a0, a0 +; RV64IMBOTH-NEXT: minu a0, a1, a0 ; RV64IMBOTH-NEXT: ret entry: %c = call i32 @llvm.umin(i32 %a, i32 %b) @@ -213,99 +180,76 @@ entry: define i32 @select_umin_3(i1 zeroext %cond, i32 %a) { ; RV32IM-LABEL: select_umin_3: ; RV32IM: # %bb.0: # %entry -; RV32IM-NEXT: li a3, 32 -; RV32IM-NEXT: mv a2, a1 -; RV32IM-NEXT: bgeu a1, a3, .LBB2_3 -; RV32IM-NEXT: # %bb.1: # %entry -; RV32IM-NEXT: beqz a0, .LBB2_4 -; RV32IM-NEXT: .LBB2_2: # %entry +; RV32IM-NEXT: neg a0, a0 +; RV32IM-NEXT: ori a2, a0, 32 ; RV32IM-NEXT: mv a0, a1 -; RV32IM-NEXT: ret -; RV32IM-NEXT: .LBB2_3: # %entry -; RV32IM-NEXT: li a2, 32 -; RV32IM-NEXT: bnez a0, .LBB2_2 -; RV32IM-NEXT: .LBB2_4: # %entry +; RV32IM-NEXT: bltu a1, a2, .LBB2_2 +; RV32IM-NEXT: # %bb.1: # %entry ; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: .LBB2_2: # %entry ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_umin_3: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: sext.w a2, a1 -; RV64IM-NEXT: li a3, 32 -; RV64IM-NEXT: bgeu a2, a3, .LBB2_3 +; RV64IM-NEXT: mv a2, a0 +; RV64IM-NEXT: sext.w a0, a1 +; RV64IM-NEXT: neg a1, a2 +; RV64IM-NEXT: ori a1, a1, 32 +; RV64IM-NEXT: bltu a0, a1, .LBB2_2 ; RV64IM-NEXT: # %bb.1: # %entry -; RV64IM-NEXT: beqz a0, .LBB2_4 -; RV64IM-NEXT: .LBB2_2: # %entry ; RV64IM-NEXT: mv a0, a1 -; RV64IM-NEXT: ret -; RV64IM-NEXT: .LBB2_3: # %entry -; RV64IM-NEXT: li a2, 32 -; RV64IM-NEXT: bnez a0, .LBB2_2 -; RV64IM-NEXT: .LBB2_4: # %entry -; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: .LBB2_2: # %entry ; RV64IM-NEXT: ret ; ; RV32IMZBB-LABEL: select_umin_3: ; RV32IMZBB: # %bb.0: # %entry -; RV32IMZBB-NEXT: bnez a0, .LBB2_2 -; RV32IMZBB-NEXT: # %bb.1: # %entry -; RV32IMZBB-NEXT: li a0, 32 -; RV32IMZBB-NEXT: minu a1, a1, a0 -; RV32IMZBB-NEXT: .LBB2_2: # %entry -; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: neg a0, a0 +; RV32IMZBB-NEXT: ori a0, a0, 32 +; RV32IMZBB-NEXT: minu a0, a1, a0 ; RV32IMZBB-NEXT: ret ; ; RV64IMZBB-LABEL: select_umin_3: ; RV64IMZBB: # %bb.0: # %entry -; RV64IMZBB-NEXT: bnez a0, .LBB2_2 -; RV64IMZBB-NEXT: # %bb.1: # %entry ; RV64IMZBB-NEXT: sext.w a1, a1 -; RV64IMZBB-NEXT: li a0, 32 -; RV64IMZBB-NEXT: minu a1, a1, a0 -; RV64IMZBB-NEXT: .LBB2_2: # %entry -; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: neg a0, a0 +; RV64IMZBB-NEXT: ori a0, a0, 32 +; RV64IMZBB-NEXT: minu a0, a1, a0 ; RV64IMZBB-NEXT: ret ; ; RV32IMZICOND-LABEL: select_umin_3: ; RV32IMZICOND: # %bb.0: # %entry -; RV32IMZICOND-NEXT: sltiu a2, a1, 32 -; RV32IMZICOND-NEXT: addi a3, a1, -32 -; RV32IMZICOND-NEXT: czero.eqz a2, a3, a2 -; RV32IMZICOND-NEXT: addi a2, a2, 32 -; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 -; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: neg a0, a0 +; RV32IMZICOND-NEXT: ori a0, a0, 32 +; RV32IMZICOND-NEXT: sltu a2, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a0, a2 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a2 ; RV32IMZICOND-NEXT: or a0, a1, a0 ; RV32IMZICOND-NEXT: ret ; ; RV64IMZICOND-LABEL: select_umin_3: ; RV64IMZICOND: # %bb.0: # %entry -; RV64IMZICOND-NEXT: sext.w a2, a1 -; RV64IMZICOND-NEXT: sltiu a3, a2, 32 -; RV64IMZICOND-NEXT: addi a2, a2, -32 -; RV64IMZICOND-NEXT: czero.eqz a2, a2, a3 -; RV64IMZICOND-NEXT: addi a2, a2, 32 -; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 -; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: neg a0, a0 +; RV64IMZICOND-NEXT: ori a0, a0, 32 +; RV64IMZICOND-NEXT: sltu a2, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a0, a2 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a2 ; RV64IMZICOND-NEXT: or a0, a1, a0 ; RV64IMZICOND-NEXT: ret ; ; RV32IMBOTH-LABEL: select_umin_3: ; RV32IMBOTH: # %bb.0: # %entry -; RV32IMBOTH-NEXT: li a2, 32 -; RV32IMBOTH-NEXT: minu a2, a1, a2 -; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 -; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 -; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: neg a0, a0 +; RV32IMBOTH-NEXT: ori a0, a0, 32 +; RV32IMBOTH-NEXT: minu a0, a1, a0 ; RV32IMBOTH-NEXT: ret ; ; RV64IMBOTH-LABEL: select_umin_3: ; RV64IMBOTH: # %bb.0: # %entry -; RV64IMBOTH-NEXT: sext.w a2, a1 -; RV64IMBOTH-NEXT: li a3, 32 -; RV64IMBOTH-NEXT: minu a2, a2, a3 -; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 -; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 -; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: neg a0, a0 +; RV64IMBOTH-NEXT: ori a0, a0, 32 +; RV64IMBOTH-NEXT: minu a0, a1, a0 ; RV64IMBOTH-NEXT: ret entry: %c = call i32 @llvm.umin(i32 %a, i32 32) @@ -316,94 +260,80 @@ entry: define i32 @select_umin_4(i1 zeroext %cond, i32 %x) { ; RV32IM-LABEL: select_umin_4: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a2, 128 -; RV32IM-NEXT: bgeu a1, a2, .LBB3_3 +; RV32IM-NEXT: neg a0, a0 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: li a1, 128 +; RV32IM-NEXT: bltu a0, a1, .LBB3_2 ; RV32IM-NEXT: # %bb.1: -; RV32IM-NEXT: beqz a0, .LBB3_4 +; RV32IM-NEXT: li a0, 128 ; RV32IM-NEXT: .LBB3_2: -; RV32IM-NEXT: mv a0, a2 -; RV32IM-NEXT: ret -; RV32IM-NEXT: .LBB3_3: -; RV32IM-NEXT: li a1, 128 -; RV32IM-NEXT: bnez a0, .LBB3_2 -; RV32IM-NEXT: .LBB3_4: -; RV32IM-NEXT: mv a0, a1 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_umin_4: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: neg a0, a0 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: sext.w a0, a0 ; RV64IM-NEXT: li a1, 128 -; RV64IM-NEXT: bgeu a2, a1, .LBB3_3 +; RV64IM-NEXT: bltu a0, a1, .LBB3_2 ; RV64IM-NEXT: # %bb.1: -; RV64IM-NEXT: beqz a0, .LBB3_4 +; RV64IM-NEXT: li a0, 128 ; RV64IM-NEXT: .LBB3_2: -; RV64IM-NEXT: mv a0, a1 -; RV64IM-NEXT: ret -; RV64IM-NEXT: .LBB3_3: -; RV64IM-NEXT: li a2, 128 -; RV64IM-NEXT: bnez a0, .LBB3_2 -; RV64IM-NEXT: .LBB3_4: -; RV64IM-NEXT: mv a0, a2 ; RV64IM-NEXT: ret ; ; RV32IMZBB-LABEL: select_umin_4: ; RV32IMZBB: # %bb.0: -; RV32IMZBB-NEXT: mv a2, a0 -; RV32IMZBB-NEXT: li a0, 128 -; RV32IMZBB-NEXT: bnez a2, .LBB3_2 -; RV32IMZBB-NEXT: # %bb.1: -; RV32IMZBB-NEXT: minu a0, a1, a0 -; RV32IMZBB-NEXT: .LBB3_2: +; RV32IMZBB-NEXT: neg a0, a0 +; RV32IMZBB-NEXT: or a0, a0, a1 +; RV32IMZBB-NEXT: li a1, 128 +; RV32IMZBB-NEXT: minu a0, a0, a1 ; RV32IMZBB-NEXT: ret ; ; RV64IMZBB-LABEL: select_umin_4: ; RV64IMZBB: # %bb.0: -; RV64IMZBB-NEXT: mv a2, a0 -; RV64IMZBB-NEXT: li a0, 128 -; RV64IMZBB-NEXT: bnez a2, .LBB3_2 -; RV64IMZBB-NEXT: # %bb.1: -; RV64IMZBB-NEXT: sext.w a1, a1 -; RV64IMZBB-NEXT: minu a0, a1, a0 -; RV64IMZBB-NEXT: .LBB3_2: +; RV64IMZBB-NEXT: neg a0, a0 +; RV64IMZBB-NEXT: or a0, a0, a1 +; RV64IMZBB-NEXT: sext.w a0, a0 +; RV64IMZBB-NEXT: li a1, 128 +; RV64IMZBB-NEXT: minu a0, a0, a1 ; RV64IMZBB-NEXT: ret ; ; RV32IMZICOND-LABEL: select_umin_4: ; RV32IMZICOND: # %bb.0: -; RV32IMZICOND-NEXT: sltiu a2, a1, 128 -; RV32IMZICOND-NEXT: addi a1, a1, -128 -; RV32IMZICOND-NEXT: czero.eqz a1, a1, a2 -; RV32IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV32IMZICOND-NEXT: neg a0, a0 +; RV32IMZICOND-NEXT: or a0, a0, a1 +; RV32IMZICOND-NEXT: sltiu a1, a0, 128 +; RV32IMZICOND-NEXT: addi a0, a0, -128 +; RV32IMZICOND-NEXT: czero.eqz a0, a0, a1 ; RV32IMZICOND-NEXT: addi a0, a0, 128 ; RV32IMZICOND-NEXT: ret ; ; RV64IMZICOND-LABEL: select_umin_4: ; RV64IMZICOND: # %bb.0: -; RV64IMZICOND-NEXT: sext.w a1, a1 -; RV64IMZICOND-NEXT: sltiu a2, a1, 128 -; RV64IMZICOND-NEXT: addi a1, a1, -128 -; RV64IMZICOND-NEXT: czero.eqz a1, a1, a2 -; RV64IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV64IMZICOND-NEXT: neg a0, a0 +; RV64IMZICOND-NEXT: or a0, a0, a1 +; RV64IMZICOND-NEXT: sext.w a0, a0 +; RV64IMZICOND-NEXT: sltiu a1, a0, 128 +; RV64IMZICOND-NEXT: addi a0, a0, -128 +; RV64IMZICOND-NEXT: czero.eqz a0, a0, a1 ; RV64IMZICOND-NEXT: addi a0, a0, 128 ; RV64IMZICOND-NEXT: ret ; ; RV32IMBOTH-LABEL: select_umin_4: ; RV32IMBOTH: # %bb.0: -; RV32IMBOTH-NEXT: li a2, 128 -; RV32IMBOTH-NEXT: minu a1, a1, a2 -; RV32IMBOTH-NEXT: addi a1, a1, -128 -; RV32IMBOTH-NEXT: czero.nez a0, a1, a0 -; RV32IMBOTH-NEXT: addi a0, a0, 128 +; RV32IMBOTH-NEXT: neg a0, a0 +; RV32IMBOTH-NEXT: or a0, a0, a1 +; RV32IMBOTH-NEXT: li a1, 128 +; RV32IMBOTH-NEXT: minu a0, a0, a1 ; RV32IMBOTH-NEXT: ret ; ; RV64IMBOTH-LABEL: select_umin_4: ; RV64IMBOTH: # %bb.0: -; RV64IMBOTH-NEXT: sext.w a1, a1 -; RV64IMBOTH-NEXT: li a2, 128 -; RV64IMBOTH-NEXT: minu a1, a1, a2 -; RV64IMBOTH-NEXT: addi a1, a1, -128 -; RV64IMBOTH-NEXT: czero.nez a0, a1, a0 -; RV64IMBOTH-NEXT: addi a0, a0, 128 +; RV64IMBOTH-NEXT: neg a0, a0 +; RV64IMBOTH-NEXT: or a0, a0, a1 +; RV64IMBOTH-NEXT: sext.w a0, a0 +; RV64IMBOTH-NEXT: li a1, 128 +; RV64IMBOTH-NEXT: minu a0, a0, a1 ; RV64IMBOTH-NEXT: ret %minmax = call i32 @llvm.umin(i32 %x, i32 128) %sel = select i1 %cond, i32 128, i32 %minmax @@ -413,96 +343,76 @@ define i32 @select_umin_4(i1 zeroext %cond, i32 %x) { define i32 @select_umax_1(i1 zeroext %cond, i32 %a, i32 %b) { ; RV32IM-LABEL: select_umax_1: ; RV32IM: # %bb.0: # %entry -; RV32IM-NEXT: bgeu a2, a1, .LBB4_3 +; RV32IM-NEXT: neg a0, a0 +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: bltu a1, a2, .LBB4_2 ; RV32IM-NEXT: # %bb.1: # %entry -; RV32IM-NEXT: beqz a0, .LBB4_4 -; RV32IM-NEXT: .LBB4_2: # %entry ; RV32IM-NEXT: mv a0, a1 -; RV32IM-NEXT: ret -; RV32IM-NEXT: .LBB4_3: # %entry -; RV32IM-NEXT: mv a1, a2 -; RV32IM-NEXT: bnez a0, .LBB4_2 -; RV32IM-NEXT: .LBB4_4: # %entry -; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: .LBB4_2: # %entry ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_umax_1: ; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: mv a3, a0 +; RV64IM-NEXT: sext.w a0, a2 +; RV64IM-NEXT: neg a2, a3 +; RV64IM-NEXT: and a1, a2, a1 ; RV64IM-NEXT: sext.w a1, a1 -; RV64IM-NEXT: sext.w a3, a2 -; RV64IM-NEXT: bgeu a3, a1, .LBB4_3 +; RV64IM-NEXT: bltu a1, a0, .LBB4_2 ; RV64IM-NEXT: # %bb.1: # %entry -; RV64IM-NEXT: beqz a0, .LBB4_4 -; RV64IM-NEXT: .LBB4_2: # %entry ; RV64IM-NEXT: mv a0, a1 -; RV64IM-NEXT: ret -; RV64IM-NEXT: .LBB4_3: # %entry -; RV64IM-NEXT: mv a1, a3 -; RV64IM-NEXT: bnez a0, .LBB4_2 -; RV64IM-NEXT: .LBB4_4: # %entry -; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: .LBB4_2: # %entry ; RV64IM-NEXT: ret ; ; RV32IMZBB-LABEL: select_umax_1: ; RV32IMZBB: # %bb.0: # %entry -; RV32IMZBB-NEXT: beqz a0, .LBB4_2 -; RV32IMZBB-NEXT: # %bb.1: -; RV32IMZBB-NEXT: maxu a2, a1, a2 -; RV32IMZBB-NEXT: .LBB4_2: # %entry -; RV32IMZBB-NEXT: mv a0, a2 +; RV32IMZBB-NEXT: neg a0, a0 +; RV32IMZBB-NEXT: and a0, a0, a1 +; RV32IMZBB-NEXT: maxu a0, a2, a0 ; RV32IMZBB-NEXT: ret ; ; RV64IMZBB-LABEL: select_umax_1: ; RV64IMZBB: # %bb.0: # %entry -; RV64IMZBB-NEXT: beqz a0, .LBB4_2 -; RV64IMZBB-NEXT: # %bb.1: ; RV64IMZBB-NEXT: sext.w a2, a2 -; RV64IMZBB-NEXT: sext.w a1, a1 -; RV64IMZBB-NEXT: maxu a2, a1, a2 -; RV64IMZBB-NEXT: .LBB4_2: # %entry -; RV64IMZBB-NEXT: mv a0, a2 +; RV64IMZBB-NEXT: neg a0, a0 +; RV64IMZBB-NEXT: and a0, a0, a1 +; RV64IMZBB-NEXT: sext.w a0, a0 +; RV64IMZBB-NEXT: maxu a0, a2, a0 ; RV64IMZBB-NEXT: ret ; ; RV32IMZICOND-LABEL: select_umax_1: ; RV32IMZICOND: # %bb.0: # %entry -; RV32IMZICOND-NEXT: sltu a3, a2, a1 -; RV32IMZICOND-NEXT: czero.nez a4, a2, a3 -; RV32IMZICOND-NEXT: czero.eqz a1, a1, a3 -; RV32IMZICOND-NEXT: or a1, a1, a4 -; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 -; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32IMZICOND-NEXT: sltu a1, a0, a2 +; RV32IMZICOND-NEXT: czero.nez a0, a0, a1 +; RV32IMZICOND-NEXT: czero.eqz a1, a2, a1 ; RV32IMZICOND-NEXT: or a0, a1, a0 ; RV32IMZICOND-NEXT: ret ; ; RV64IMZICOND-LABEL: select_umax_1: ; RV64IMZICOND: # %bb.0: # %entry -; RV64IMZICOND-NEXT: sext.w a1, a1 -; RV64IMZICOND-NEXT: sext.w a3, a2 -; RV64IMZICOND-NEXT: sltu a4, a3, a1 -; RV64IMZICOND-NEXT: czero.nez a3, a3, a4 -; RV64IMZICOND-NEXT: czero.eqz a1, a1, a4 -; RV64IMZICOND-NEXT: or a1, a1, a3 -; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 -; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: sext.w a2, a2 +; RV64IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64IMZICOND-NEXT: sext.w a0, a0 +; RV64IMZICOND-NEXT: sltu a1, a0, a2 +; RV64IMZICOND-NEXT: czero.nez a0, a0, a1 +; RV64IMZICOND-NEXT: czero.eqz a1, a2, a1 ; RV64IMZICOND-NEXT: or a0, a1, a0 ; RV64IMZICOND-NEXT: ret ; ; RV32IMBOTH-LABEL: select_umax_1: ; RV32IMBOTH: # %bb.0: # %entry -; RV32IMBOTH-NEXT: maxu a1, a1, a2 -; RV32IMBOTH-NEXT: czero.nez a2, a2, a0 ; RV32IMBOTH-NEXT: czero.eqz a0, a1, a0 -; RV32IMBOTH-NEXT: or a0, a0, a2 +; RV32IMBOTH-NEXT: maxu a0, a2, a0 ; RV32IMBOTH-NEXT: ret ; ; RV64IMBOTH-LABEL: select_umax_1: ; RV64IMBOTH: # %bb.0: # %entry -; RV64IMBOTH-NEXT: sext.w a3, a2 -; RV64IMBOTH-NEXT: sext.w a1, a1 -; RV64IMBOTH-NEXT: maxu a1, a1, a3 -; RV64IMBOTH-NEXT: czero.nez a2, a2, a0 +; RV64IMBOTH-NEXT: sext.w a2, a2 ; RV64IMBOTH-NEXT: czero.eqz a0, a1, a0 -; RV64IMBOTH-NEXT: or a0, a0, a2 +; RV64IMBOTH-NEXT: sext.w a0, a0 +; RV64IMBOTH-NEXT: maxu a0, a2, a0 ; RV64IMBOTH-NEXT: ret entry: %c = call i32 @llvm.umax(i32 %a, i32 %b) @@ -513,97 +423,76 @@ entry: define i32 @select_umax_2(i1 zeroext %cond, i32 %a, i32 %b) { ; RV32IM-LABEL: select_umax_2: ; RV32IM: # %bb.0: # %entry -; RV32IM-NEXT: mv a3, a1 -; RV32IM-NEXT: bgeu a2, a1, .LBB5_3 +; RV32IM-NEXT: addi a0, a0, -1 +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: bltu a2, a1, .LBB5_2 ; RV32IM-NEXT: # %bb.1: # %entry -; RV32IM-NEXT: beqz a0, .LBB5_4 +; RV32IM-NEXT: mv a0, a2 ; RV32IM-NEXT: .LBB5_2: # %entry -; RV32IM-NEXT: mv a0, a1 -; RV32IM-NEXT: ret -; RV32IM-NEXT: .LBB5_3: # %entry -; RV32IM-NEXT: mv a3, a2 -; RV32IM-NEXT: bnez a0, .LBB5_2 -; RV32IM-NEXT: .LBB5_4: # %entry -; RV32IM-NEXT: mv a0, a3 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_umax_2: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: sext.w a3, a1 -; RV64IM-NEXT: sext.w a2, a2 -; RV64IM-NEXT: bgeu a2, a3, .LBB5_3 +; RV64IM-NEXT: mv a3, a0 +; RV64IM-NEXT: sext.w a0, a1 +; RV64IM-NEXT: addi a3, a3, -1 +; RV64IM-NEXT: and a1, a3, a2 +; RV64IM-NEXT: sext.w a1, a1 +; RV64IM-NEXT: bltu a1, a0, .LBB5_2 ; RV64IM-NEXT: # %bb.1: # %entry -; RV64IM-NEXT: beqz a0, .LBB5_4 -; RV64IM-NEXT: .LBB5_2: # %entry ; RV64IM-NEXT: mv a0, a1 -; RV64IM-NEXT: ret -; RV64IM-NEXT: .LBB5_3: # %entry -; RV64IM-NEXT: mv a3, a2 -; RV64IM-NEXT: bnez a0, .LBB5_2 -; RV64IM-NEXT: .LBB5_4: # %entry -; RV64IM-NEXT: mv a0, a3 +; RV64IM-NEXT: .LBB5_2: # %entry ; RV64IM-NEXT: ret ; ; RV32IMZBB-LABEL: select_umax_2: ; RV32IMZBB: # %bb.0: # %entry -; RV32IMZBB-NEXT: bnez a0, .LBB5_2 -; RV32IMZBB-NEXT: # %bb.1: # %entry -; RV32IMZBB-NEXT: maxu a1, a1, a2 -; RV32IMZBB-NEXT: .LBB5_2: # %entry -; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: addi a0, a0, -1 +; RV32IMZBB-NEXT: and a0, a0, a2 +; RV32IMZBB-NEXT: maxu a0, a1, a0 ; RV32IMZBB-NEXT: ret ; ; RV64IMZBB-LABEL: select_umax_2: ; RV64IMZBB: # %bb.0: # %entry -; RV64IMZBB-NEXT: bnez a0, .LBB5_2 -; RV64IMZBB-NEXT: # %bb.1: # %entry -; RV64IMZBB-NEXT: sext.w a2, a2 ; RV64IMZBB-NEXT: sext.w a1, a1 -; RV64IMZBB-NEXT: maxu a1, a1, a2 -; RV64IMZBB-NEXT: .LBB5_2: # %entry -; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: addi a0, a0, -1 +; RV64IMZBB-NEXT: and a0, a0, a2 +; RV64IMZBB-NEXT: sext.w a0, a0 +; RV64IMZBB-NEXT: maxu a0, a1, a0 ; RV64IMZBB-NEXT: ret ; ; RV32IMZICOND-LABEL: select_umax_2: ; RV32IMZICOND: # %bb.0: # %entry -; RV32IMZICOND-NEXT: sltu a3, a2, a1 -; RV32IMZICOND-NEXT: czero.nez a2, a2, a3 -; RV32IMZICOND-NEXT: czero.eqz a3, a1, a3 -; RV32IMZICOND-NEXT: or a2, a3, a2 -; RV32IMZICOND-NEXT: czero.nez a2, a2, a0 -; RV32IMZICOND-NEXT: czero.eqz a0, a1, a0 -; RV32IMZICOND-NEXT: or a0, a0, a2 +; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: sltu a2, a0, a1 +; RV32IMZICOND-NEXT: czero.nez a0, a0, a2 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a2 +; RV32IMZICOND-NEXT: or a0, a1, a0 ; RV32IMZICOND-NEXT: ret ; ; RV64IMZICOND-LABEL: select_umax_2: ; RV64IMZICOND: # %bb.0: # %entry -; RV64IMZICOND-NEXT: sext.w a3, a1 -; RV64IMZICOND-NEXT: sext.w a2, a2 -; RV64IMZICOND-NEXT: sltu a4, a2, a3 -; RV64IMZICOND-NEXT: czero.nez a2, a2, a4 -; RV64IMZICOND-NEXT: czero.eqz a3, a3, a4 -; RV64IMZICOND-NEXT: or a2, a3, a2 -; RV64IMZICOND-NEXT: czero.nez a2, a2, a0 -; RV64IMZICOND-NEXT: czero.eqz a0, a1, a0 -; RV64IMZICOND-NEXT: or a0, a0, a2 +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: sext.w a0, a0 +; RV64IMZICOND-NEXT: sltu a2, a0, a1 +; RV64IMZICOND-NEXT: czero.nez a0, a0, a2 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a2 +; RV64IMZICOND-NEXT: or a0, a1, a0 ; RV64IMZICOND-NEXT: ret ; ; RV32IMBOTH-LABEL: select_umax_2: ; RV32IMBOTH: # %bb.0: # %entry -; RV32IMBOTH-NEXT: maxu a2, a1, a2 -; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 ; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 -; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: maxu a0, a1, a0 ; RV32IMBOTH-NEXT: ret ; ; RV64IMBOTH-LABEL: select_umax_2: ; RV64IMBOTH: # %bb.0: # %entry -; RV64IMBOTH-NEXT: sext.w a2, a2 -; RV64IMBOTH-NEXT: sext.w a3, a1 -; RV64IMBOTH-NEXT: maxu a2, a3, a2 -; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV64IMBOTH-NEXT: sext.w a1, a1 ; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 -; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: sext.w a0, a0 +; RV64IMBOTH-NEXT: maxu a0, a1, a0 ; RV64IMBOTH-NEXT: ret entry: %c = call i32 @llvm.umax(i32 %a, i32 %b) @@ -614,99 +503,76 @@ entry: define i32 @select_umax_3(i1 zeroext %cond, i32 %a) { ; RV32IM-LABEL: select_umax_3: ; RV32IM: # %bb.0: # %entry -; RV32IM-NEXT: li a3, 32 -; RV32IM-NEXT: mv a2, a1 -; RV32IM-NEXT: bgeu a3, a1, .LBB6_3 -; RV32IM-NEXT: # %bb.1: # %entry -; RV32IM-NEXT: beqz a0, .LBB6_4 -; RV32IM-NEXT: .LBB6_2: # %entry +; RV32IM-NEXT: addi a0, a0, -1 +; RV32IM-NEXT: andi a2, a0, 32 ; RV32IM-NEXT: mv a0, a1 -; RV32IM-NEXT: ret -; RV32IM-NEXT: .LBB6_3: # %entry -; RV32IM-NEXT: li a2, 32 -; RV32IM-NEXT: bnez a0, .LBB6_2 -; RV32IM-NEXT: .LBB6_4: # %entry +; RV32IM-NEXT: bltu a2, a1, .LBB6_2 +; RV32IM-NEXT: # %bb.1: # %entry ; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: .LBB6_2: # %entry ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_umax_3: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: sext.w a2, a1 -; RV64IM-NEXT: li a3, 32 -; RV64IM-NEXT: bgeu a3, a2, .LBB6_3 +; RV64IM-NEXT: mv a2, a0 +; RV64IM-NEXT: sext.w a0, a1 +; RV64IM-NEXT: addi a2, a2, -1 +; RV64IM-NEXT: andi a1, a2, 32 +; RV64IM-NEXT: bltu a1, a0, .LBB6_2 ; RV64IM-NEXT: # %bb.1: # %entry -; RV64IM-NEXT: beqz a0, .LBB6_4 -; RV64IM-NEXT: .LBB6_2: # %entry ; RV64IM-NEXT: mv a0, a1 -; RV64IM-NEXT: ret -; RV64IM-NEXT: .LBB6_3: # %entry -; RV64IM-NEXT: li a2, 32 -; RV64IM-NEXT: bnez a0, .LBB6_2 -; RV64IM-NEXT: .LBB6_4: # %entry -; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: .LBB6_2: # %entry ; RV64IM-NEXT: ret ; ; RV32IMZBB-LABEL: select_umax_3: ; RV32IMZBB: # %bb.0: # %entry -; RV32IMZBB-NEXT: bnez a0, .LBB6_2 -; RV32IMZBB-NEXT: # %bb.1: # %entry -; RV32IMZBB-NEXT: li a0, 32 -; RV32IMZBB-NEXT: maxu a1, a1, a0 -; RV32IMZBB-NEXT: .LBB6_2: # %entry -; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: addi a0, a0, -1 +; RV32IMZBB-NEXT: andi a0, a0, 32 +; RV32IMZBB-NEXT: maxu a0, a1, a0 ; RV32IMZBB-NEXT: ret ; ; RV64IMZBB-LABEL: select_umax_3: ; RV64IMZBB: # %bb.0: # %entry -; RV64IMZBB-NEXT: bnez a0, .LBB6_2 -; RV64IMZBB-NEXT: # %bb.1: # %entry ; RV64IMZBB-NEXT: sext.w a1, a1 -; RV64IMZBB-NEXT: li a0, 32 -; RV64IMZBB-NEXT: maxu a1, a1, a0 -; RV64IMZBB-NEXT: .LBB6_2: # %entry -; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: addi a0, a0, -1 +; RV64IMZBB-NEXT: andi a0, a0, 32 +; RV64IMZBB-NEXT: maxu a0, a1, a0 ; RV64IMZBB-NEXT: ret ; ; RV32IMZICOND-LABEL: select_umax_3: ; RV32IMZICOND: # %bb.0: # %entry -; RV32IMZICOND-NEXT: sltiu a2, a1, 33 -; RV32IMZICOND-NEXT: addi a3, a1, -32 -; RV32IMZICOND-NEXT: czero.nez a2, a3, a2 -; RV32IMZICOND-NEXT: addi a2, a2, 32 -; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 -; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: addi a0, a0, -1 +; RV32IMZICOND-NEXT: andi a0, a0, 32 +; RV32IMZICOND-NEXT: sltu a2, a0, a1 +; RV32IMZICOND-NEXT: czero.nez a0, a0, a2 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a2 ; RV32IMZICOND-NEXT: or a0, a1, a0 ; RV32IMZICOND-NEXT: ret ; ; RV64IMZICOND-LABEL: select_umax_3: ; RV64IMZICOND: # %bb.0: # %entry -; RV64IMZICOND-NEXT: sext.w a2, a1 -; RV64IMZICOND-NEXT: sltiu a3, a2, 33 -; RV64IMZICOND-NEXT: addi a2, a2, -32 -; RV64IMZICOND-NEXT: czero.nez a2, a2, a3 -; RV64IMZICOND-NEXT: addi a2, a2, 32 -; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 -; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: addi a0, a0, -1 +; RV64IMZICOND-NEXT: andi a0, a0, 32 +; RV64IMZICOND-NEXT: sltu a2, a0, a1 +; RV64IMZICOND-NEXT: czero.nez a0, a0, a2 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a2 ; RV64IMZICOND-NEXT: or a0, a1, a0 ; RV64IMZICOND-NEXT: ret ; ; RV32IMBOTH-LABEL: select_umax_3: ; RV32IMBOTH: # %bb.0: # %entry -; RV32IMBOTH-NEXT: li a2, 32 -; RV32IMBOTH-NEXT: maxu a2, a1, a2 -; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 -; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 -; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: addi a0, a0, -1 +; RV32IMBOTH-NEXT: andi a0, a0, 32 +; RV32IMBOTH-NEXT: maxu a0, a1, a0 ; RV32IMBOTH-NEXT: ret ; ; RV64IMBOTH-LABEL: select_umax_3: ; RV64IMBOTH: # %bb.0: # %entry -; RV64IMBOTH-NEXT: sext.w a2, a1 -; RV64IMBOTH-NEXT: li a3, 32 -; RV64IMBOTH-NEXT: maxu a2, a2, a3 -; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 -; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 -; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: addi a0, a0, -1 +; RV64IMBOTH-NEXT: andi a0, a0, 32 +; RV64IMBOTH-NEXT: maxu a0, a1, a0 ; RV64IMBOTH-NEXT: ret entry: %c = call i32 @llvm.umax(i32 %a, i32 32) @@ -717,94 +583,76 @@ entry: define i32 @select_umax_4(i1 zeroext %cond, i32 %x) { ; RV32IM-LABEL: select_umax_4: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a2, 128 -; RV32IM-NEXT: bgeu a2, a1, .LBB7_3 +; RV32IM-NEXT: addi a0, a0, -1 +; RV32IM-NEXT: and a0, a0, a1 +; RV32IM-NEXT: li a1, 128 +; RV32IM-NEXT: bltu a1, a0, .LBB7_2 ; RV32IM-NEXT: # %bb.1: -; RV32IM-NEXT: beqz a0, .LBB7_4 +; RV32IM-NEXT: li a0, 128 ; RV32IM-NEXT: .LBB7_2: -; RV32IM-NEXT: mv a0, a2 -; RV32IM-NEXT: ret -; RV32IM-NEXT: .LBB7_3: -; RV32IM-NEXT: li a1, 128 -; RV32IM-NEXT: bnez a0, .LBB7_2 -; RV32IM-NEXT: .LBB7_4: -; RV32IM-NEXT: mv a0, a1 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_umax_4: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: addi a0, a0, -1 +; RV64IM-NEXT: and a0, a0, a1 +; RV64IM-NEXT: sext.w a0, a0 ; RV64IM-NEXT: li a1, 128 -; RV64IM-NEXT: bgeu a1, a2, .LBB7_3 +; RV64IM-NEXT: bltu a1, a0, .LBB7_2 ; RV64IM-NEXT: # %bb.1: -; RV64IM-NEXT: beqz a0, .LBB7_4 +; RV64IM-NEXT: li a0, 128 ; RV64IM-NEXT: .LBB7_2: -; RV64IM-NEXT: mv a0, a1 -; RV64IM-NEXT: ret -; RV64IM-NEXT: .LBB7_3: -; RV64IM-NEXT: li a2, 128 -; RV64IM-NEXT: bnez a0, .LBB7_2 -; RV64IM-NEXT: .LBB7_4: -; RV64IM-NEXT: mv a0, a2 ; RV64IM-NEXT: ret ; ; RV32IMZBB-LABEL: select_umax_4: ; RV32IMZBB: # %bb.0: -; RV32IMZBB-NEXT: mv a2, a0 -; RV32IMZBB-NEXT: li a0, 128 -; RV32IMZBB-NEXT: bnez a2, .LBB7_2 -; RV32IMZBB-NEXT: # %bb.1: -; RV32IMZBB-NEXT: maxu a0, a1, a0 -; RV32IMZBB-NEXT: .LBB7_2: +; RV32IMZBB-NEXT: addi a0, a0, -1 +; RV32IMZBB-NEXT: and a0, a0, a1 +; RV32IMZBB-NEXT: li a1, 128 +; RV32IMZBB-NEXT: maxu a0, a0, a1 ; RV32IMZBB-NEXT: ret ; ; RV64IMZBB-LABEL: select_umax_4: ; RV64IMZBB: # %bb.0: -; RV64IMZBB-NEXT: mv a2, a0 -; RV64IMZBB-NEXT: li a0, 128 -; RV64IMZBB-NEXT: bnez a2, .LBB7_2 -; RV64IMZBB-NEXT: # %bb.1: -; RV64IMZBB-NEXT: sext.w a1, a1 -; RV64IMZBB-NEXT: maxu a0, a1, a0 -; RV64IMZBB-NEXT: .LBB7_2: +; RV64IMZBB-NEXT: addi a0, a0, -1 +; RV64IMZBB-NEXT: and a0, a0, a1 +; RV64IMZBB-NEXT: sext.w a0, a0 +; RV64IMZBB-NEXT: li a1, 128 +; RV64IMZBB-NEXT: maxu a0, a0, a1 ; RV64IMZBB-NEXT: ret ; ; RV32IMZICOND-LABEL: select_umax_4: ; RV32IMZICOND: # %bb.0: -; RV32IMZICOND-NEXT: sltiu a2, a1, 129 -; RV32IMZICOND-NEXT: addi a1, a1, -128 -; RV32IMZICOND-NEXT: czero.nez a1, a1, a2 ; RV32IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV32IMZICOND-NEXT: sltiu a1, a0, 129 +; RV32IMZICOND-NEXT: addi a0, a0, -128 +; RV32IMZICOND-NEXT: czero.nez a0, a0, a1 ; RV32IMZICOND-NEXT: addi a0, a0, 128 ; RV32IMZICOND-NEXT: ret ; ; RV64IMZICOND-LABEL: select_umax_4: ; RV64IMZICOND: # %bb.0: -; RV64IMZICOND-NEXT: sext.w a1, a1 -; RV64IMZICOND-NEXT: sltiu a2, a1, 129 -; RV64IMZICOND-NEXT: addi a1, a1, -128 -; RV64IMZICOND-NEXT: czero.nez a1, a1, a2 ; RV64IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV64IMZICOND-NEXT: sext.w a0, a0 +; RV64IMZICOND-NEXT: sltiu a1, a0, 129 +; RV64IMZICOND-NEXT: addi a0, a0, -128 +; RV64IMZICOND-NEXT: czero.nez a0, a0, a1 ; RV64IMZICOND-NEXT: addi a0, a0, 128 ; RV64IMZICOND-NEXT: ret ; ; RV32IMBOTH-LABEL: select_umax_4: ; RV32IMBOTH: # %bb.0: -; RV32IMBOTH-NEXT: li a2, 128 -; RV32IMBOTH-NEXT: maxu a1, a1, a2 -; RV32IMBOTH-NEXT: addi a1, a1, -128 ; RV32IMBOTH-NEXT: czero.nez a0, a1, a0 -; RV32IMBOTH-NEXT: addi a0, a0, 128 +; RV32IMBOTH-NEXT: li a1, 128 +; RV32IMBOTH-NEXT: maxu a0, a0, a1 ; RV32IMBOTH-NEXT: ret ; ; RV64IMBOTH-LABEL: select_umax_4: ; RV64IMBOTH: # %bb.0: -; RV64IMBOTH-NEXT: sext.w a1, a1 -; RV64IMBOTH-NEXT: li a2, 128 -; RV64IMBOTH-NEXT: maxu a1, a1, a2 -; RV64IMBOTH-NEXT: addi a1, a1, -128 ; RV64IMBOTH-NEXT: czero.nez a0, a1, a0 -; RV64IMBOTH-NEXT: addi a0, a0, 128 +; RV64IMBOTH-NEXT: sext.w a0, a0 +; RV64IMBOTH-NEXT: li a1, 128 +; RV64IMBOTH-NEXT: maxu a0, a0, a1 ; RV64IMBOTH-NEXT: ret %minmax = call i32 @llvm.umax(i32 %x, i32 128) %sel = select i1 %cond, i32 128, i32 %minmax From ea24d62f107667b36a01997ccd588531e837759b Mon Sep 17 00:00:00 2001 From: CatherineMoore Date: Fri, 12 Sep 2025 13:14:11 -0400 Subject: [PATCH 160/734] Add table to track OpenMP 5.2 Support; Update status of task graph (#158322) implementation; Co-authored-by: Michael Klemm --- clang/docs/OpenMPSupport.rst | 87 +++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index cb8ea5e511101..47a8109abb21c 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -348,12 +348,97 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +.. _OpenMP 5.2 implementation details: + +OpenMP 5.2 Implementation Details +================================= + +The following table provides a quick overview of various OpenMP 5.2 features +and their implementation status. Please post on the +`Discourse forums (Runtimes - OpenMP category)`_ for more +information or if you want to help with the +implementation. + ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +|Feature | C/C++ Status | Fortran Status | Reviews | ++=============================================================+===========================+===========================+==========================================================================+ +| omp_in_explicit_task() | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| semantics of explicit_task_var and implicit_task_var | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ompx sentinel for C/C++ directive extensions | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ompx prefix for clause extensions | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| if clause on teams construct | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| step modifier added | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| declare mapper: Add iterator modifier on map clause | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| declare mapper: Add iterator modifier on map clause | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| memspace and traits modifiers to uses allocator i | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Add otherwise clause to metadirectives | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| doacross clause with support for omp_cur_iteration | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| position of interop_type in init clause on iterop | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| implicit map type for target enter/exit data | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| work OMPT type for work-sharing loop constructs | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| allocate and firstprivate on scope directive | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Change loop consistency for order clause | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Add memspace and traits modifiers to uses_allocators | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Keep original base pointer on map w/o matched candidate | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Pure procedure support for certain directives | :none:`N/A` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ALLOCATE statement support for allocators | :none:`N/A` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| dispatch construct extension to support end directive | :none:`N/A` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ + ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +|OpenMP 5.2 Deprecations | C/C++ Status | Fortran Status | Reviews | ++=============================================================+===========================+===========================+==========================================================================+ +| Linear clause syntax | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The minus operator | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Map clause modifiers without commas | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The use of allocate directives with ALLOCATE statement | :good:`N/A` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| uses_allocators list syntax | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The default clause on metadirectives | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The delimited form of the declare target directive | :none:`unclaimed` | :good:`N/A` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The use of the to clause on the declare target directive | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The syntax of the destroy clause on the depobj construct | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| keyword source and sink as task-dependence modifiers | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| interop types in any position on init clause of interop | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ompd prefix usage for some ICVs | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ + .. _OpenMP 6.0 implementation details: OpenMP 6.0 Implementation Details ================================= -The following table provides a quick overview over various OpenMP 6.0 features +The following table provides a quick overview of various OpenMP 6.0 features and their implementation status. Please post on the `Discourse forums (Runtimes - OpenMP category)`_ for more information or if you want to help with the From fd58f235f8c5bd40d98acfd8e7fb11d41de301c7 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 12 Sep 2025 10:15:41 -0700 Subject: [PATCH 161/734] Revert "[SCEV] Fold (C1 * A /u C2) -> A /u (C2 /u C1), if C2 > C1." (#158328) Reverts llvm/llvm-project#157656 There are multiple reports that this is causing miscompiles in the MSan test suite after bootstrapping and that this is causing miscompiles in rustc. Let's revert for now, and work to capture a reproducer next week. --- llvm/lib/Analysis/ScalarEvolution.cpp | 18 +++++------------- .../Analysis/ScalarEvolution/mul-udiv-folds.ll | 2 +- .../LoopStrengthReduce/duplicated-phis.ll | 3 ++- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 5bcafd96f1aa5..a1703a270952e 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3217,26 +3217,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, } // Try to fold (C1 * D /u C2) -> C1/C2 * D, if C1 and C2 are powers-of-2, - // D is a multiple of C2, and C1 is a multiple of C2. If C2 is a multiple - // of C1, fold to (D /u (C2 /u C1)). + // D is a multiple of C2, and C1 is a multiple of C2. const SCEV *D; APInt C1V = LHSC->getAPInt(); - // (C1 * D /u C2) == -1 * -C1 * D /u C2 when C1 != INT_MIN. Don't treat -1 - // as -1 * 1, as it won't enable additional folds. - if (C1V.isNegative() && !C1V.isMinSignedValue() && !C1V.isAllOnes()) + // (C1 * D /u C2) == -1 * -C1 * D /u C2 when C1 != INT_MIN. + if (C1V.isNegative() && !C1V.isMinSignedValue()) C1V = C1V.abs(); const SCEVConstant *C2; if (C1V.isPowerOf2() && match(Ops[1], m_scev_UDiv(m_SCEV(D), m_SCEVConstant(C2))) && - C2->getAPInt().isPowerOf2() && + C2->getAPInt().isPowerOf2() && C1V.uge(C2->getAPInt()) && C1V.logBase2() <= getMinTrailingZeros(D)) { - const SCEV *NewMul; - if (C1V.uge(C2->getAPInt())) { - NewMul = getMulExpr(getUDivExpr(getConstant(C1V), C2), D); - } else { - assert(C1V.ugt(1) && "C1 <= 1 should have been folded earlier"); - NewMul = getUDivExpr(D, getUDivExpr(C2, getConstant(C1V))); - } + const SCEV *NewMul = getMulExpr(getUDivExpr(getConstant(C1V), C2), D); return C1V == LHSC->getAPInt() ? NewMul : getNegativeSCEV(NewMul); } } diff --git a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll index 1d34706baadeb..8dd8ec47e7090 100644 --- a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll +++ b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll @@ -21,7 +21,7 @@ define void @udiv4_and_udiv2(i1 %c, ptr %A) { ; CHECK-NEXT: %gep.8 = getelementptr i8, ptr %A, i64 %iv ; CHECK-NEXT: --> {(((zext i32 %start to i64) /u 4) + %A),+,1}<%loop> U: full-set S: full-set Exits: (((zext i32 %start to i64) /u 2) + %A) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep.16 = getelementptr i16, ptr %A, i64 %iv -; CHECK-NEXT: --> {(((zext i32 %start to i64) /u 2) + %A),+,2}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) + %A) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((2 * ((zext i32 %start to i64) /u 4)) + %A),+,2}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) + %A) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep.32 = getelementptr i32, ptr %A, i64 %iv ; CHECK-NEXT: --> {((zext i32 %start to i64) + %A),+,4}<%loop> U: full-set S: full-set Exits: ((2 * (zext i32 %start to i64)) + %A) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep.40 = getelementptr <{ i32, i8 }>, ptr %A, i64 %iv diff --git a/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll b/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll index c59f7d9c2a41a..cee8c8abdb450 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll @@ -18,7 +18,8 @@ define i64 @test_duplicated_phis(i64 noundef %N) { ; CHECK: [[FOR_BODY_PREHEADER_NEW]]: ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[UNROLL_ITER]], -4 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP5]], 1 ; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = sub i64 -3, [[TMP3]] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: From e5db36b604db35efe3bd3930a1f3a31bf30ef5ec Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 12 Sep 2025 10:21:40 -0700 Subject: [PATCH 162/734] [Clang] Port ulimit tests to work with internal shell Now that ulimit is implemented for the internal shell, we can make sure that the clang tests utilizing ulimit actually work. One just needs the removal of its shell requirement while the other one needs some rework to avoid bash for loops. These are writtein in Python for about the same amount of complexity. Reviewers: ilovepi, cmtice, AaronBallman, Sirraide, petrhosek Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/157977 --- clang/test/PCH/leakfiles.test | 11 +++++------ .../PR51712-large-array-constexpr-check-oom.cpp | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/clang/test/PCH/leakfiles.test b/clang/test/PCH/leakfiles.test index dc4047ac3ff48..45dc36f6708bf 100644 --- a/clang/test/PCH/leakfiles.test +++ b/clang/test/PCH/leakfiles.test @@ -1,9 +1,8 @@ // Test that compiling using a PCH doesn't leak file descriptors. // https://bugs.chromium.org/p/chromium/issues/detail?id=924225 // -// This test requires bash loops and ulimit. -// REQUIRES: shell -// UNSUPPORTED: target={{.*win32.*}} +// This test uses ulimit. +// UNSUPPORTED: system-windows // // Set up source files. lib/lib.h includes lots of lib*.h files in that dir. // client.c includes lib/lib.h, and also the individual files directly. @@ -12,10 +11,10 @@ // RUN: mkdir %t // RUN: cd %t // RUN: mkdir lib -// RUN: for i in {1..300}; do touch lib/lib$i.h; done -// RUN: for i in {1..300}; do echo "#include \"lib$i.h\"" >> lib/lib.h; done +// RUN: %python -c "from pathlib import Path; list(map(lambda i: Path(f'lib/lib{i}.h').touch(), range(1, 301)))" +// RUN: %python -c "for i in range(1, 301): print(f'#include \"lib{i}.h\"')" > lib/lib.h // RUN: echo "#include \"lib/lib.h\"" > client.c -// RUN: for i in {1..300}; do echo "#include \"lib/lib$i.h\"" >> client.c; done +// RUN: %python -c "for i in range(1, 301): print(f'#include \"lib/lib{i}.h\"')" > client.c // // We want to verify that we don't hold all the files open at the same time. // This is important e.g. on mac, which has a low default FD limit. diff --git a/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp b/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp index 98e1a9afae6ea..df5d8c513d514 100644 --- a/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp +++ b/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp @@ -1,7 +1,6 @@ // Only run this test where ulimit is known to work well. // (There's nothing really platform-specific being tested, this is just ulimit). // -// REQUIRES: shell // REQUIRES: system-linux // UNSUPPORTED: msan // UNSUPPORTED: asan From 84f431c35b3fbd5b9c46608689f25a5d29bc0f55 Mon Sep 17 00:00:00 2001 From: Peter Rong Date: Fri, 12 Sep 2025 10:33:53 -0700 Subject: [PATCH 163/734] [DebugLine] Correct debug line emittion (#157529) ### Context #99710 introduced `.loc_label` so we can terminate a line sequence. However, it did not advance PC properly. This is problematic for 1-instruction functions as it will have zero-length sequence. The test checked in that PR shows the problem: ``` # CHECK-LINE-TABLE: Address Line Column File ISA Discriminator OpIndex Flags # CHECK-LINE-TABLE-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- # CHECK-LINE-TABLE-NEXT: 0x00000028: 05 DW_LNS_set_column (1) # CHECK-LINE-TABLE-NEXT: 0x0000002a: 00 DW_LNE_set_address (0x0000000000000000) # CHECK-LINE-TABLE-NEXT: 0x00000035: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000000 1 1 1 0 0 0 is_stmt # CHECK-LINE-TABLE-NEXT: 0x00000036: 00 DW_LNE_end_sequence # CHECK-LINE-TABLE-NEXT: 0x0000000000000000 1 1 1 0 0 0 is_stmt end_sequence ``` Both rows having PC 0x0 is incorrect, and parsers won't be able to parse them. See more explanation why this is wrong in #154851. ### Design This PR attempts to fix this by advancing the PC to the next available Label, and advance to the end of the section if no Label is available. ### Implementation - `emitDwarfLineEndEntry` will advance PC to the `CurrLabel` - If `CurrLabel` is null, its probably a fake LineEntry we introduced in #110192. In that case look for the next Label - If still not label can be found, use `null` and `emitDwarfLineEndEntry` is smart enough to advance PC to the end of the section - Rename `LastLabel` to `PrevLabel`, "last" can mean "previous" or "final", this is ambigous. - Updated the tests to emit a correct label. ### Note This fix should render #154986 and #154851 obsolete, they were temporary fixes and don't resolve the root cause. --------- Signed-off-by: Peter Rong --- llvm/lib/MC/MCDwarf.cpp | 30 ++++-- llvm/test/DebugInfo/ARM/stmt_seq_macho.test | 98 +++++++++++++++++++ .../X86/DW_AT_LLVM_stmt_seq_sec_offset.ll | 29 +++--- llvm/test/MC/ELF/debug-loc-label.s | 54 +++++----- 4 files changed, 162 insertions(+), 49 deletions(-) create mode 100644 llvm/test/DebugInfo/ARM/stmt_seq_macho.test diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index e7c0d37e8f99b..e8f000a584839 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -181,7 +181,7 @@ void MCDwarfLineTable::emitOne( unsigned FileNum, LastLine, Column, Flags, Isa, Discriminator; bool IsAtStartSeq; - MCSymbol *LastLabel; + MCSymbol *PrevLabel; auto init = [&]() { FileNum = 1; LastLine = 1; @@ -189,21 +189,31 @@ void MCDwarfLineTable::emitOne( Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0; Isa = 0; Discriminator = 0; - LastLabel = nullptr; + PrevLabel = nullptr; IsAtStartSeq = true; }; init(); // Loop through each MCDwarfLineEntry and encode the dwarf line number table. bool EndEntryEmitted = false; - for (const MCDwarfLineEntry &LineEntry : LineEntries) { - MCSymbol *Label = LineEntry.getLabel(); + for (auto It = LineEntries.begin(); It != LineEntries.end(); ++It) { + auto LineEntry = *It; + MCSymbol *CurrLabel = LineEntry.getLabel(); const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo(); if (LineEntry.LineStreamLabel) { if (!IsAtStartSeq) { - MCOS->emitDwarfLineEndEntry(Section, LastLabel, - /*EndLabel =*/LastLabel); + auto *Label = CurrLabel; + auto NextIt = It + 1; + // LineEntry with a null Label is probably a fake LineEntry we added + // when `-emit-func-debug-line-table-offsets` in order to terminate the + // sequence. Look for the next Label if possible, otherwise we will set + // the PC to the end of the section. + if (!Label && NextIt != LineEntries.end()) { + Label = NextIt->getLabel(); + } + MCOS->emitDwarfLineEndEntry(Section, PrevLabel, + /*EndLabel =*/Label); init(); } MCOS->emitLabel(LineEntry.LineStreamLabel, LineEntry.StreamLabelDefLoc); @@ -211,7 +221,7 @@ void MCDwarfLineTable::emitOne( } if (LineEntry.IsEndEntry) { - MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, Label, + MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, PrevLabel, CurrLabel, asmInfo->getCodePointerSize()); init(); EndEntryEmitted = true; @@ -258,12 +268,12 @@ void MCDwarfLineTable::emitOne( // At this point we want to emit/create the sequence to encode the delta in // line numbers and the increment of the address from the previous Label // and the current Label. - MCOS->emitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label, + MCOS->emitDwarfAdvanceLineAddr(LineDelta, PrevLabel, CurrLabel, asmInfo->getCodePointerSize()); Discriminator = 0; LastLine = LineEntry.getLine(); - LastLabel = Label; + PrevLabel = CurrLabel; IsAtStartSeq = false; } @@ -273,7 +283,7 @@ void MCDwarfLineTable::emitOne( // does not track ranges nor terminate the line table. In that case, // conservatively use the section end symbol to end the line table. if (!EndEntryEmitted && !IsAtStartSeq) - MCOS->emitDwarfLineEndEntry(Section, LastLabel); + MCOS->emitDwarfLineEndEntry(Section, PrevLabel); } void MCDwarfLineTable::endCurrentSeqAndEmitLineStreamLabel(MCStreamer *MCOS, diff --git a/llvm/test/DebugInfo/ARM/stmt_seq_macho.test b/llvm/test/DebugInfo/ARM/stmt_seq_macho.test new file mode 100644 index 0000000000000..f0874bfc45ed2 --- /dev/null +++ b/llvm/test/DebugInfo/ARM/stmt_seq_macho.test @@ -0,0 +1,98 @@ +// RUN: split-file %s %t + +// RUN: clang++ --target=arm64-apple-macos11 \ +// RUN: %t/stmt_seq_macho.cpp -o %t/stmt_seq_macho.o \ +// RUN: -g -Oz -gdwarf-4 -c -mno-outline \ +// RUN: -mllvm -emit-func-debug-line-table-offsets \ +// RUN: -fdebug-compilation-dir=/private/tmp/stmt_seq \ +// RUN: -fno-unwind-tables -fno-exceptions + +// RUN: llvm-dwarfdump -all %t/stmt_seq_macho.o | FileCheck %s + +// CHECK: Address Line Column File ISA Discriminator OpIndex Flags +// CHECK-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- +// CHECK-NEXT: 0x0000000000000000 2 33 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x0000000000000004 2 33 1 0 0 0 is_stmt end_sequence +// CHECK-NEXT: 0x0000000000000004 3 33 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x0000000000000008 3 33 1 0 0 0 is_stmt end_sequence +// CHECK-NEXT: 0x0000000000000008 4 33 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x000000000000000c 4 33 1 0 0 0 is_stmt end_sequence +// CHECK-NEXT: 0x000000000000000c 7 10 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x0000000000000010 7 3 1 0 0 0 +// CHECK-NEXT: 0x0000000000000014 7 3 1 0 0 0 end_sequence +// CHECK-NEXT: 0x0000000000000014 12 14 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x0000000000000018 12 5 1 0 0 0 +// CHECK-NEXT: 0x000000000000001c 12 5 1 0 0 0 end_sequence +// CHECK-NEXT: 0x000000000000001c 16 14 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x0000000000000020 16 5 1 0 0 0 +// CHECK-NEXT: 0x0000000000000024 16 5 1 0 0 0 end_sequence +// CHECK-NEXT: 0x0000000000000024 21 14 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x0000000000000028 21 5 1 0 0 0 +// CHECK-NEXT: 0x000000000000002c 21 5 1 0 0 0 end_sequence +// CHECK-NEXT: 0x000000000000002c 25 20 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x0000000000000030 26 5 1 0 0 0 is_stmt +// CHECK-NEXT: 0x0000000000000034 26 5 1 0 0 0 is_stmt end_sequence +// CHECK-NEXT: 0x0000000000000034 37 0 1 0 0 0 is_stmt +// CHECK-NEXT: 0x0000000000000044 39 12 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x0000000000000050 40 12 1 0 0 0 is_stmt +// CHECK-NEXT: 0x0000000000000058 40 9 1 0 0 0 is_stmt +// CHECK-NEXT: 0x000000000000005c 41 12 1 0 0 0 is_stmt +// CHECK-NEXT: 0x0000000000000068 42 12 1 0 0 0 is_stmt +// CHECK-NEXT: 0x0000000000000070 41 9 1 0 0 0 is_stmt +// CHECK-NEXT: 0x0000000000000074 46 18 1 0 0 0 is_stmt +// CHECK-NEXT: 0x0000000000000084 42 9 1 0 0 0 is_stmt +// CHECK-NEXT: 0x0000000000000088 47 5 1 0 0 0 is_stmt epilogue_begin +// CHECK-NEXT: 0x0000000000000098 47 5 1 0 0 0 is_stmt end_sequence +// CHECK-NEXT: 0x0000000000000098 34 85 1 0 0 0 is_stmt prologue_end +// CHECK-NEXT: 0x000000000000009c 34 85 1 0 0 0 is_stmt end_sequence +// CHECK-NEXT: 0x000000000000009c 34 86 1 0 0 0 is_stmt prologue_end + +#--- stmt_seq_macho.cpp +#define ATTRIB extern "C" __attribute__((noinline)) +ATTRIB void function_empty_1() {} +ATTRIB void function_empty_2() {} +ATTRIB void function_empty_3() {} + +ATTRIB int function1_copy1(int a) { + return ++a; +} + +ATTRIB int function3_copy1(int a) { + int b = a + 3; + return b + 1; +} + +ATTRIB int function2_copy1(int a) { + return a - 22; +} + +ATTRIB int function3_copy2(int a) { + int b = a + 3; + return b + 1; +} + +ATTRIB int function2_copy2(int a) { + int result = a - 22; + return result; +} + +struct logic_error { + logic_error(const char* s) {} +}; + +struct length_error : public logic_error { + __attribute__((noinline)) explicit length_error(const char* s) : logic_error(s) {} +}; + +int main() { + int sum = 0; + sum += function2_copy2(3); + sum += function3_copy2(41); + sum += function2_copy1(11); + sum += function1_copy1(42); + function_empty_1(); + function_empty_2(); + function_empty_3(); + length_error e("test"); + return sum; +} diff --git a/llvm/test/DebugInfo/X86/DW_AT_LLVM_stmt_seq_sec_offset.ll b/llvm/test/DebugInfo/X86/DW_AT_LLVM_stmt_seq_sec_offset.ll index 58f6495924b90..f17c6e5429b6b 100644 --- a/llvm/test/DebugInfo/X86/DW_AT_LLVM_stmt_seq_sec_offset.ll +++ b/llvm/test/DebugInfo/X86/DW_AT_LLVM_stmt_seq_sec_offset.ll @@ -14,7 +14,7 @@ ; STMT_SEQ: DW_AT_LLVM_stmt_sequence [DW_FORM_sec_offset] (0x00000043) ; STMT_SEQ: DW_AT_name {{.*}}func01 ; STMT_SEQ: DW_TAG_subprogram [[[ABBREV_CODE2]]] -; STMT_SEQ: DW_AT_LLVM_stmt_sequence [DW_FORM_sec_offset] (0x00000056) +; STMT_SEQ: DW_AT_LLVM_stmt_sequence [DW_FORM_sec_offset] (0x00000058) ; STMT_SEQ: DW_AT_name {{.*}}main ;; Check the entire line sequence to see that it's correct @@ -29,22 +29,23 @@ ; STMT_SEQ-NEXT: 0x00000050: 05 DW_LNS_set_column (3) ; STMT_SEQ-NEXT: 0x00000052: 67 address += 6, line += 1, op-index += 0 ; STMT_SEQ-NEXT: 0x0000000000000006 6 3 0 0 0 0 is_stmt -; STMT_SEQ-NEXT: 0x00000053: 00 DW_LNE_end_sequence -; STMT_SEQ-NEXT: 0x0000000000000006 6 3 0 0 0 0 is_stmt end_sequence -; STMT_SEQ-NEXT: 0x00000056: 04 DW_LNS_set_file (0) -; STMT_SEQ-NEXT: 0x00000058: 00 DW_LNE_set_address (0x00000008) -; STMT_SEQ-NEXT: 0x0000005f: 03 DW_LNS_advance_line (10) -; STMT_SEQ-NEXT: 0x00000061: 01 DW_LNS_copy +; STMT_SEQ-NEXT: 0x00000053: 02 DW_LNS_advance_pc (addr += 2, op-index += 0) +; STMT_SEQ-NEXT: 0x00000055: 00 DW_LNE_end_sequence +; STMT_SEQ-NEXT: 0x0000000000000008 6 3 0 0 0 0 is_stmt end_sequence +; STMT_SEQ-NEXT: 0x00000058: 04 DW_LNS_set_file (0) +; STMT_SEQ-NEXT: 0x0000005a: 00 DW_LNE_set_address (0x00000008) +; STMT_SEQ-NEXT: 0x00000061: 03 DW_LNS_advance_line (10) +; STMT_SEQ-NEXT: 0x00000063: 01 DW_LNS_copy ; STMT_SEQ-NEXT: 0x0000000000000008 10 0 0 0 0 0 is_stmt -; STMT_SEQ-NEXT: 0x00000062: 05 DW_LNS_set_column (10) -; STMT_SEQ-NEXT: 0x00000064: 0a DW_LNS_set_prologue_end -; STMT_SEQ-NEXT: 0x00000065: 83 address += 8, line += 1, op-index += 0 +; STMT_SEQ-NEXT: 0x00000064: 05 DW_LNS_set_column (10) +; STMT_SEQ-NEXT: 0x00000066: 0a DW_LNS_set_prologue_end +; STMT_SEQ-NEXT: 0x00000067: 83 address += 8, line += 1, op-index += 0 ; STMT_SEQ-NEXT: 0x0000000000000010 11 10 0 0 0 0 is_stmt prologue_end -; STMT_SEQ-NEXT: 0x00000066: 05 DW_LNS_set_column (3) -; STMT_SEQ-NEXT: 0x00000068: 9f address += 10, line += 1, op-index += 0 +; STMT_SEQ-NEXT: 0x00000068: 05 DW_LNS_set_column (3) +; STMT_SEQ-NEXT: 0x0000006a: 9f address += 10, line += 1, op-index += 0 ; STMT_SEQ-NEXT: 0x000000000000001a 12 3 0 0 0 0 is_stmt -; STMT_SEQ-NEXT: 0x00000069: 02 DW_LNS_advance_pc (addr += 5, op-index += 0) -; STMT_SEQ-NEXT: 0x0000006b: 00 DW_LNE_end_sequence +; STMT_SEQ-NEXT: 0x0000006b: 02 DW_LNS_advance_pc (addr += 5, op-index += 0) +; STMT_SEQ-NEXT: 0x0000006d: 00 DW_LNE_end_sequence ; STMT_SEQ-NEXT: 0x000000000000001f 12 3 0 0 0 0 is_stmt end_sequence ; generated from: diff --git a/llvm/test/MC/ELF/debug-loc-label.s b/llvm/test/MC/ELF/debug-loc-label.s index 6b5d04777bef4..4200b1192107b 100644 --- a/llvm/test/MC/ELF/debug-loc-label.s +++ b/llvm/test/MC/ELF/debug-loc-label.s @@ -17,43 +17,47 @@ # CHECK-LINE-TABLE-NEXT: 0x0000002a: 00 DW_LNE_set_address (0x0000000000000000) # CHECK-LINE-TABLE-NEXT: 0x00000035: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000000 1 1 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x00000036: 00 DW_LNE_end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000000000000000 1 1 1 0 0 0 is_stmt end_sequence -# CHECK-LINE-TABLE-NEXT: 0x00000039: 05 DW_LNS_set_column (2) -# CHECK-LINE-TABLE-NEXT: 0x0000003b: 00 DW_LNE_set_address (0x0000000000000008) -# CHECK-LINE-TABLE-NEXT: 0x00000046: 01 DW_LNS_copy +# CHECK-LINE-TABLE-NEXT: 0x00000036: 02 DW_LNS_advance_pc (addr += 8, op-index += 0) +# CHECK-LINE-TABLE-NEXT: 0x00000038: 00 DW_LNE_end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000000000000008 1 1 1 0 0 0 is_stmt end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000003b: 05 DW_LNS_set_column (2) +# CHECK-LINE-TABLE-NEXT: 0x0000003d: 00 DW_LNE_set_address (0x0000000000000008) +# CHECK-LINE-TABLE-NEXT: 0x00000048: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000008 1 2 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x00000047: 00 DW_LNE_end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000000000000008 1 2 1 0 0 0 is_stmt end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000004a: 05 DW_LNS_set_column (3) -# CHECK-LINE-TABLE-NEXT: 0x0000004c: 00 DW_LNE_set_address (0x0000000000000010) -# CHECK-LINE-TABLE-NEXT: 0x00000057: 01 DW_LNS_copy +# CHECK-LINE-TABLE-NEXT: 0x00000049: 02 DW_LNS_advance_pc (addr += 8, op-index += 0) +# CHECK-LINE-TABLE-NEXT: 0x0000004b: 00 DW_LNE_end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000000000000010 1 2 1 0 0 0 is_stmt end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000004e: 05 DW_LNS_set_column (3) +# CHECK-LINE-TABLE-NEXT: 0x00000050: 00 DW_LNE_set_address (0x0000000000000010) +# CHECK-LINE-TABLE-NEXT: 0x0000005b: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000010 1 3 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x00000058: 00 DW_LNE_end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000000000000010 1 3 1 0 0 0 is_stmt end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000005b: 05 DW_LNS_set_column (4) -# CHECK-LINE-TABLE-NEXT: 0x0000005d: 00 DW_LNE_set_address (0x0000000000000018) -# CHECK-LINE-TABLE-NEXT: 0x00000068: 01 DW_LNS_copy +# CHECK-LINE-TABLE-NEXT: 0x0000005c: 02 DW_LNS_advance_pc (addr += 8, op-index += 0) +# CHECK-LINE-TABLE-NEXT: 0x0000005e: 00 DW_LNE_end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000000000000018 1 3 1 0 0 0 is_stmt end_sequence +# CHECK-LINE-TABLE-NEXT: 0x00000061: 05 DW_LNS_set_column (4) +# CHECK-LINE-TABLE-NEXT: 0x00000063: 00 DW_LNE_set_address (0x0000000000000018) +# CHECK-LINE-TABLE-NEXT: 0x0000006e: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000018 1 4 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x00000069: 05 DW_LNS_set_column (5) -# CHECK-LINE-TABLE-NEXT: 0x0000006b: 01 DW_LNS_copy +# CHECK-LINE-TABLE-NEXT: 0x0000006f: 05 DW_LNS_set_column (5) +# CHECK-LINE-TABLE-NEXT: 0x00000071: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000018 1 5 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x0000006c: 00 DW_LNE_end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000000000000018 1 5 1 0 0 0 is_stmt end_sequence +# CHECK-LINE-TABLE-NEXT: 0x00000072: 02 DW_LNS_advance_pc (addr += 8, op-index += 0) +# CHECK-LINE-TABLE-NEXT: 0x00000074: 00 DW_LNE_end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000000000000020 1 5 1 0 0 0 is_stmt end_sequence # CHECK-SYM: Symbol table '.symtab' contains 9 entries: # CHECK-SYM-NEXT: Num: Value Size Type Bind Vis Ndx Name # CHECK-SYM-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND # CHECK-SYM-NEXT: 1: 0000000000000000 0 FILE LOCAL DEFAULT ABS test.c # CHECK-SYM-NEXT: 2: 0000000000000000 0 SECTION LOCAL DEFAULT 2 .text -# CHECK-SYM-NEXT: 3: 0000000000000039 0 NOTYPE LOCAL DEFAULT 3 my_label_02 -# CHECK-SYM-NEXT: 4: 000000000000004a 0 NOTYPE LOCAL DEFAULT 3 my_label_03 -# CHECK-SYM-NEXT: 5: 000000000000005b 0 NOTYPE LOCAL DEFAULT 3 my_label_04 -# CHECK-SYM-NEXT: 6: 000000000000004a 0 NOTYPE LOCAL DEFAULT 3 my_label_03.1 -# CHECK-SYM-NEXT: 7: 000000000000006f 0 NOTYPE LOCAL DEFAULT 3 my_label_05 +# CHECK-SYM-NEXT: 3: 000000000000003b 0 NOTYPE LOCAL DEFAULT 3 my_label_02 +# CHECK-SYM-NEXT: 4: 000000000000004e 0 NOTYPE LOCAL DEFAULT 3 my_label_03 +# CHECK-SYM-NEXT: 5: 0000000000000061 0 NOTYPE LOCAL DEFAULT 3 my_label_04 +# CHECK-SYM-NEXT: 6: 000000000000004e 0 NOTYPE LOCAL DEFAULT 3 my_label_03.1 +# CHECK-SYM-NEXT: 7: 0000000000000077 0 NOTYPE LOCAL DEFAULT 3 my_label_05 # CHECK-SYM-NEXT: 8: 0000000000000000 0 FUNC GLOBAL DEFAULT 2 foo -# CHECK-OFFSETS: 0000 39000000 4a000000 5b000000 +# CHECK-OFFSETS: 0000 3b000000 4e000000 61000000 .text .file "test.c" From 330068a74bfb6333f9016e3c4053eeaf4989d601 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 12 Sep 2025 17:53:17 +0000 Subject: [PATCH 164/734] Revert "[lit] Implement ulimit builtin" This reverts commit 615d07ea55ea57afab0205aa739239070448a038. This was causing some MacOS buildbolt failures. --- llvm/utils/lit/lit/TestRunner.py | 38 +------------------ .../builtin_commands/_launch_with_limit.py | 25 ------------ .../lit/tests/Inputs/shtest-ulimit/lit.cfg | 8 ---- .../Inputs/shtest-ulimit/print_limits.py | 4 -- .../Inputs/shtest-ulimit/ulimit-bad-arg.txt | 1 - .../Inputs/shtest-ulimit/ulimit_okay.txt | 5 --- llvm/utils/lit/tests/shtest-ulimit.py | 18 --------- 7 files changed, 1 insertion(+), 98 deletions(-) delete mode 100644 llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py delete mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg delete mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py delete mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt delete mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt delete mode 100644 llvm/utils/lit/tests/shtest-ulimit.py diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 90c2c6479b004..a769919558a47 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -92,12 +92,11 @@ class ShellEnvironment(object): we maintain a dir stack for pushd/popd. """ - def __init__(self, cwd, env, umask=-1, ulimit={}): + def __init__(self, cwd, env, umask=-1): self.cwd = cwd self.env = dict(env) self.umask = umask self.dirStack = [] - self.ulimit = ulimit def change_dir(self, newdir): if os.path.isabs(newdir): @@ -596,27 +595,6 @@ def executeBuiltinUmask(cmd, shenv): return ShellCommandResult(cmd, "", "", 0, False) -def executeBuiltinUlimit(cmd, shenv): - """executeBuiltinUlimit - Change the current limits.""" - if os.name != "posix": - raise InternalShellError(cmd, "'ulimit' not supported on this system") - if len(cmd.args) != 3: - raise InternalShellError(cmd, "'ulimit' requires two arguments") - try: - new_limit = int(cmd.args[2]) - except ValueError as err: - raise InternalShellError(cmd, "Error: 'ulimit': %s" % str(err)) - if cmd.args[1] == "-v": - shenv.ulimit["RLIMIT_AS"] = new_limit * 1024 - elif cmd.args[1] == "-n": - shenv.ulimit["RLIMIT_NOFILE"] = new_limit - else: - raise InternalShellError( - cmd, "'ulimit' does not support option: %s" % cmd.args[1] - ) - return ShellCommandResult(cmd, "", "", 0, False) - - def executeBuiltinColon(cmd, cmd_shenv): """executeBuiltinColon - Discard arguments and exit with status 0.""" return ShellCommandResult(cmd, "", "", 0, False) @@ -771,7 +749,6 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): "popd": executeBuiltinPopd, "pushd": executeBuiltinPushd, "rm": executeBuiltinRm, - "ulimit": executeBuiltinUlimit, "umask": executeBuiltinUmask, ":": executeBuiltinColon, } @@ -937,19 +914,6 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): if kIsWindows: args = quote_windows_command(args) - # Handle any resource limits. We do this by launching the command with - # a wrapper that sets the necessary limits. We use a wrapper rather than - # setting the limits in process as we cannot reraise the limits back to - # their defaults without elevated permissions. - if cmd_shenv.ulimit: - executable = sys.executable - args.insert(0, sys.executable) - args.insert(1, os.path.join(builtin_commands_dir, "_launch_with_limit.py")) - for limit in cmd_shenv.ulimit: - cmd_shenv.env["LIT_INTERNAL_ULIMIT_" + limit] = str( - cmd_shenv.ulimit[limit] - ) - try: # TODO(boomanaiden154): We currently wrap the subprocess.Popen with # os.umask as the umask argument in subprocess.Popen is not diff --git a/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py b/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py deleted file mode 100644 index 33d2d59ff0dbe..0000000000000 --- a/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys -import subprocess -import resource -import os - -ULIMIT_ENV_VAR_PREFIX = "LIT_INTERNAL_ULIMIT_" - - -def main(argv): - command_args = argv[1:] - for env_var in os.environ: - if env_var.startswith(ULIMIT_ENV_VAR_PREFIX): - limit_str = env_var[len(ULIMIT_ENV_VAR_PREFIX) :] - limit_value = int(os.environ[env_var]) - limit = (limit_value, limit_value) - if limit_str == "RLIMIT_AS": - resource.setrlimit(resource.RLIMIT_AS, limit) - elif limit_str == "RLIMIT_NOFILE": - resource.setrlimit(resource.RLIMIT_NOFILE, limit) - process_output = subprocess.run(command_args) - sys.exit(process_output.returncode) - - -if __name__ == "__main__": - main(sys.argv) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg deleted file mode 100644 index c7bdc7e7b6bc0..0000000000000 --- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg +++ /dev/null @@ -1,8 +0,0 @@ -import lit.formats - -config.name = "shtest-ulimit" -config.suffixes = [".txt"] -config.test_format = lit.formats.ShTest(execute_external=False) -config.test_source_root = None -config.test_exec_root = None -config.substitutions.append(("%{python}", '"%s"' % (sys.executable))) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py b/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py deleted file mode 100644 index 632f954fa8fde..0000000000000 --- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py +++ /dev/null @@ -1,4 +0,0 @@ -import resource - -print("RLIMIT_AS=" + str(resource.getrlimit(resource.RLIMIT_AS)[0])) -print("RLIMIT_NOFILE=" + str(resource.getrlimit(resource.RLIMIT_NOFILE)[0])) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt deleted file mode 100644 index efa22881047e9..0000000000000 --- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt +++ /dev/null @@ -1 +0,0 @@ -# RUN: ulimit -n diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt deleted file mode 100644 index ad353b5d7c459..0000000000000 --- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt +++ /dev/null @@ -1,5 +0,0 @@ -# RUN: ulimit -v 1048576 -# RUN: ulimit -n 50 -# RUN: %{python} %S/print_limits.py -# Fail the test so that we can assert on the output. -# RUN: not echo return diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py deleted file mode 100644 index 8d7f436dc8af2..0000000000000 --- a/llvm/utils/lit/tests/shtest-ulimit.py +++ /dev/null @@ -1,18 +0,0 @@ -# Check the ulimit command - -# ulimit does not work on non-POSIX platforms. -# UNSUPPORTED: system-windows - -# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit | FileCheck %s - -# CHECK: -- Testing: 2 tests{{.*}} - -# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}}) -# CHECK: ulimit -n -# CHECK: 'ulimit' requires two arguments - -# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}}) -# CHECK: ulimit -v 1048576 -# CHECK: ulimit -n 50 -# CHECK: RLIMIT_AS=1073741824 -# CHECK: RLIMIT_NOFILE=50 From 8eba28bc8ce9447d09edda6fc79e2191a1669252 Mon Sep 17 00:00:00 2001 From: Han-Chung Wang Date: Fri, 12 Sep 2025 10:57:20 -0700 Subject: [PATCH 165/734] [mlir][NFC] Correct pattern names to match the behaviors. (#158177) It is a follow-up for https://github.com/llvm/llvm-project/pull/131982#discussion_r2286014576 and https://github.com/llvm/llvm-project/pull/126898#discussion_r2286013250. The names do not match the behaviors, and the revision updates the names. Signed-off-by: hanhanW --- mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp index dfce835a1954b..7ec61c7df81cf 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp @@ -319,7 +319,7 @@ struct BubbleUpExpandThroughParallelCollapse /// Note - this pattern could be extended to be a swap pattern between /// `tensor.expand_shape` and `tensor.extract_slice`, but is currently /// implemented only as a bubble up pattern for `tensor.extract_slice`. -struct BubbleUpExpandShapeThroughExtractSlice +struct BubbleUpExtractSliceThroughExpandShape : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -427,7 +427,7 @@ struct BubbleUpExpandShapeThroughExtractSlice /// to tensor<15xf32> /// ``` /// But this is not the intended purpose of the transformation. -struct BubbleUpCollapseShapeThroughExtractSlice +struct BubbleUpExtractSliceThroughCollapseShape : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -735,6 +735,6 @@ void mlir::tensor::populateBubbleUpExpandShapePatterns( void mlir::tensor::populateBubbleUpExtractSliceOpPatterns( RewritePatternSet &patterns) { - patterns.add(patterns.getContext()); + patterns.add(patterns.getContext()); } From 370607065d65d4cd65bf455fcf2de12576d8e272 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Fri, 12 Sep 2025 19:30:11 +0200 Subject: [PATCH 166/734] [llvm] Regenerate test checks including TBAA semantics (NFC) Tests exercizing TBAA metadata (both purposefully and not), and previously generated via UTC, have been regenerated and updated to version 6. --- .../Analysis/TypeBasedAliasAnalysis/dse.ll | 45 +- .../gvn-nonlocal-type-mismatch.ll | 50 +- .../TypeBasedAliasAnalysis/memcpyopt.ll | 21 +- .../Bitcode/upgrade-masked-keep-metadata.ll | 44 +- .../test/DebugInfo/unrolled-loop-remainder.ll | 102 +- .../vector-track-origins-neon.ll | 10 +- .../TypeSanitizer/access-with-offset.ll | 19 +- .../Instrumentation/TypeSanitizer/anon.ll | 152 +- .../TypeSanitizer/basic-nosan.ll | 43 +- .../Instrumentation/TypeSanitizer/basic.ll | 106 +- .../TypeSanitizer/nosanitize.ll | 13 +- .../ArgumentPromotion/reserve-tbaa.ll | 37 +- .../ArgumentPromotion/reserve-tbaa.ll | 52 +- .../Attributor/value-simplify-pointer-info.ll | 1884 +++++++------- .../Transforms/GVN/PRE/load-pre-nonlocal.ll | 91 +- llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll | 14 +- llvm/test/Transforms/GVN/pr33549.ll | 45 +- llvm/test/Transforms/GVN/pr64598.ll | 106 +- llvm/test/Transforms/GVN/tbaa.ll | 80 +- llvm/test/Transforms/GVNHoist/hoist-md.ll | 108 +- .../AMDGPU/mem-intrinsics.ll | 24 +- .../InstCombine/alloca-cast-debuginfo.ll | 11 +- .../InstCombine/load-no-aliasing.ll | 17 +- .../InstCombine/loadstore-metadata.ll | 178 +- .../masked_intrinsics_keep_metadata.ll | 28 +- .../InstCombine/struct-assign-tbaa.ll | 42 +- .../Transforms/JumpThreading/ddt-crash3.ll | 31 +- .../Transforms/JumpThreading/thread-loads.ll | 350 +-- .../2011-04-06-PromoteResultOfPromotion.ll | 32 +- llvm/test/Transforms/LICM/pr50367.ll | 66 +- llvm/test/Transforms/LICM/scalar-promote.ll | 545 ++-- llvm/test/Transforms/LICM/variant-aainfo.ll | 28 +- .../test/Transforms/LoopIdiom/memmove-tbaa.ll | 70 +- .../LoopUnrollAndJam/unroll-and-jam.ll | 1402 +++++----- .../LoopVectorize/X86/cost-model-assert.ll | 38 +- .../Transforms/LoopVectorize/X86/pr54634.ll | 119 +- .../LoopVectorize/X86/strided_load_cost.ll | 439 ++-- .../constantfolder-infer-correct-gepty.ll | 10 +- .../test/Transforms/LoopVectorize/metadata.ll | 56 +- .../LoopVersioning/add-phi-update-users.ll | 77 +- .../preserve-store-metadata.ll | 17 +- .../test/Transforms/NewGVN/memory-handling.ll | 150 +- llvm/test/Transforms/NewGVN/pr31501.ll | 57 +- llvm/test/Transforms/NewGVN/pr33305.ll | 105 +- llvm/test/Transforms/NewGVN/pr33367.ll | 66 +- llvm/test/Transforms/NewGVN/pr34452.ll | 23 +- ...rve-metadata-for-predicate-replacements.ll | 38 +- llvm/test/Transforms/NewGVN/tbaa.ll | 28 +- .../Transforms/NewGVN/volatile-nonvolatile.ll | 34 +- llvm/test/Transforms/OpenMP/dead_use.ll | 24 +- .../Transforms/OpenMP/global_constructor.ll | 42 +- llvm/test/Transforms/OpenMP/spmdization.ll | 2310 ++++++++--------- .../Transforms/OpenMP/spmdization_assumes.ll | 52 +- .../Transforms/OpenMP/spmdization_indirect.ll | 690 ++--- .../PhaseOrdering/AArch64/slpordering.ll | 45 +- .../PhaseOrdering/AArch64/udotabd.ll | 134 +- .../PhaseOrdering/SystemZ/sub-xor.ll | 91 +- .../X86/SROA-after-final-loop-unrolling-2.ll | 39 +- .../X86/hoist-load-of-baseptr.ll | 179 +- .../X86/preserve-access-group.ll | 26 +- .../PhaseOrdering/X86/speculation-vs-tbaa.ll | 97 +- .../PhaseOrdering/X86/spurious-peeling.ll | 105 +- .../PhaseOrdering/X86/vdiv-nounroll.ll | 36 +- .../test/Transforms/PhaseOrdering/X86/vdiv.ll | 164 +- .../PhaseOrdering/loop-access-checks.ll | 106 +- .../X86/memset-pattern.ll | 6 +- .../SLPVectorizer/AArch64/32-bit.ll | 20 +- .../SLPVectorizer/AArch64/spillcost-di.ll | 17 +- .../SLPVectorizer/AArch64/store-ptr.ll | 43 +- .../SystemZ/vec-elt-insertion.ll | 10 +- .../X86/crash_scheduling-inseltpoison.ll | 39 +- .../SLPVectorizer/X86/crash_scheduling.ll | 39 +- .../Transforms/SLPVectorizer/X86/metadata.ll | 38 +- .../Transforms/SLPVectorizer/X86/pr16899.ll | 31 +- .../Transforms/SLPVectorizer/X86/pr40522.ll | 66 +- .../Transforms/SLPVectorizer/X86/pr46983.ll | 38 +- .../SLPVectorizer/X86/pr47629-inseltpoison.ll | 457 ++-- .../Transforms/SLPVectorizer/X86/pr47629.ll | 457 ++-- .../Transforms/SLPVectorizer/X86/pr49933.ll | 14 +- .../SLPVectorizer/X86/remark_listcost.ll | 36 +- llvm/test/Transforms/SROA/tbaa-struct2.ll | 13 +- llvm/test/Transforms/SROA/tbaa-struct3.ll | 80 +- llvm/test/Transforms/SROA/tbaa-subload.ll | 20 +- .../Scalarizer/basic-inseltpoison.ll | 274 +- llvm/test/Transforms/Scalarizer/basic.ll | 281 +- 85 files changed, 6995 insertions(+), 6227 deletions(-) diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll index 50ea1913b0c76..5f04f12777bd8 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll @@ -1,14 +1,14 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -aa-pipeline=tbaa,basic-aa -passes=dse -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; DSE should make use of TBAA. define i8 @test0_yes(ptr %a, ptr %b) nounwind { -; CHECK-LABEL: define i8 @test0_yes -; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: store i8 1, ptr [[A]], align 1, !tbaa [[TBAA3:![0-9]+]] +; CHECK-LABEL: define i8 @test0_yes( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[BAR_TBAA0:![0-9]+]] +; CHECK-NEXT: store i8 1, ptr [[A]], align 1, !tbaa [[FOO_TBAA3:![0-9]+]] ; CHECK-NEXT: ret i8 [[Y]] ; store i8 0, ptr %a, !tbaa !1 @@ -18,11 +18,11 @@ define i8 @test0_yes(ptr %a, ptr %b) nounwind { } define i8 @test0_no(ptr %a, ptr %b) nounwind { -; CHECK-LABEL: define i8 @test0_no -; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i8 0, ptr [[A]], align 1, !tbaa [[TBAA3]] -; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[TBAA5:![0-9]+]] -; CHECK-NEXT: store i8 1, ptr [[A]], align 1, !tbaa [[TBAA3]] +; CHECK-LABEL: define i8 @test0_no( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i8 0, ptr [[A]], align 1, !tbaa [[FOO_TBAA3]] +; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[BAR_TBAA5:![0-9]+]] +; CHECK-NEXT: store i8 1, ptr [[A]], align 1, !tbaa [[FOO_TBAA3]] ; CHECK-NEXT: ret i8 [[Y]] ; store i8 0, ptr %a, !tbaa !3 @@ -32,9 +32,9 @@ define i8 @test0_no(ptr %a, ptr %b) nounwind { } define i8 @test1_yes(ptr %a, ptr %b) nounwind { -; CHECK-LABEL: define i8 @test1_yes -; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[TBAA8:![0-9]+]] +; CHECK-LABEL: define i8 @test1_yes( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[QUX_TBAA8:![0-9]+]] ; CHECK-NEXT: store i8 1, ptr [[A]], align 1 ; CHECK-NEXT: ret i8 [[Y]] ; @@ -45,10 +45,10 @@ define i8 @test1_yes(ptr %a, ptr %b) nounwind { } define i8 @test1_no(ptr %a, ptr %b) nounwind { -; CHECK-LABEL: define i8 @test1_no -; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-LABEL: define i8 @test1_no( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: store i8 0, ptr [[A]], align 1 -; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1, !tbaa [[QUX_TBAA10:![0-9]+]] ; CHECK-NEXT: store i8 1, ptr [[A]], align 1 ; CHECK-NEXT: ret i8 [[Y]] ; @@ -80,3 +80,16 @@ define i8 @test1_no(ptr %a, ptr %b) nounwind { !10 = !{ !"bar", !12} !11 = !{ !"qux", !0} !12 = !{!"different"} +;. +; CHECK: [[BAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"bar", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{} +; CHECK: [[FOO_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"foo", [[META2]]} +; CHECK: [[BAR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"bar", [[META7:![0-9]+]]} +; CHECK: [[META7]] = !{!"different"} +; CHECK: [[QUX_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0, i1 true} +; CHECK: [[META9]] = !{!"qux", [[META2]]} +; CHECK: [[QUX_TBAA10]] = !{[[META9]], [[META9]], i64 0, i1 false} +;. diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll index d896a1b164844..685c0159dd21d 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes=gvn -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMDEP ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='gvn' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMSSA @@ -11,8 +11,8 @@ define void @yes(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind { ; CHECK-MEMDEP-LABEL: define void @yes( ; CHECK-MEMDEP-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]] -; CHECK-MEMDEP-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-MEMDEP-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-MEMDEP-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0:![0-9]+]] +; CHECK-MEMDEP-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3:![0-9]+]] ; CHECK-MEMDEP-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] ; CHECK-MEMDEP: [[IF_THEN]]: ; CHECK-MEMDEP-NEXT: store i32 0, ptr [[Q]], align 4 @@ -23,11 +23,11 @@ define void @yes(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind { ; CHECK-MEMSSA-LABEL: define void @yes( ; CHECK-MEMSSA-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]] -; CHECK-MEMSSA-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-MEMSSA-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-MEMSSA-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0:![0-9]+]] +; CHECK-MEMSSA-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3:![0-9]+]] ; CHECK-MEMSSA-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] ; CHECK-MEMSSA: [[IF_THEN]]: -; CHECK-MEMSSA-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA0]] +; CHECK-MEMSSA-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[RED_TBAA0]] ; CHECK-MEMSSA-NEXT: store i32 [[T]], ptr [[Q]], align 4 ; CHECK-MEMSSA-NEXT: ret void ; CHECK-MEMSSA: [[IF_ELSE]]: @@ -56,15 +56,15 @@ define void @watch_out_for_type_change(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind ; CHECK-LABEL: define void @watch_out_for_type_change( ; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0:![0-9]+]] +; CHECK-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3:![0-9]+]] ; CHECK-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] ; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[OUTER_SPACE_TBAA5:![0-9]+]] ; CHECK-NEXT: store i32 [[T]], ptr [[Q]], align 4 ; CHECK-NEXT: ret void ; CHECK: [[IF_ELSE]]: -; CHECK-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[BRICK_RED_TBAA8:![0-9]+]] ; CHECK-NEXT: store i32 [[U]], ptr [[Q]], align 4 ; CHECK-NEXT: ret void ; @@ -91,29 +91,29 @@ define void @watch_out_for_another_type_change(i1 %c, ptr %p, ptr %p1, ptr %q) n ; CHECK-MEMDEP-LABEL: define void @watch_out_for_another_type_change( ; CHECK-MEMDEP-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { ; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]] -; CHECK-MEMDEP-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0]] -; CHECK-MEMDEP-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3]] +; CHECK-MEMDEP-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0]] +; CHECK-MEMDEP-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3]] ; CHECK-MEMDEP-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] ; CHECK-MEMDEP: [[IF_THEN]]: ; CHECK-MEMDEP-NEXT: store i32 0, ptr [[Q]], align 4 ; CHECK-MEMDEP-NEXT: ret void ; CHECK-MEMDEP: [[IF_ELSE]]: -; CHECK-MEMDEP-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5]] +; CHECK-MEMDEP-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[OUTER_SPACE_TBAA5]] ; CHECK-MEMDEP-NEXT: store i32 [[U]], ptr [[Q]], align 4 ; CHECK-MEMDEP-NEXT: ret void ; ; CHECK-MEMSSA-LABEL: define void @watch_out_for_another_type_change( ; CHECK-MEMSSA-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { ; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]] -; CHECK-MEMSSA-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0]] -; CHECK-MEMSSA-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3]] +; CHECK-MEMSSA-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[RED_TBAA0]] +; CHECK-MEMSSA-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[BLU_TBAA3]] ; CHECK-MEMSSA-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] ; CHECK-MEMSSA: [[IF_THEN]]: -; CHECK-MEMSSA-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA8]] +; CHECK-MEMSSA-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[BRICK_RED_TBAA8]] ; CHECK-MEMSSA-NEXT: store i32 [[T]], ptr [[Q]], align 4 ; CHECK-MEMSSA-NEXT: ret void ; CHECK-MEMSSA: [[IF_ELSE]]: -; CHECK-MEMSSA-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5]] +; CHECK-MEMSSA-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[OUTER_SPACE_TBAA5]] ; CHECK-MEMSSA-NEXT: store i32 [[U]], ptr [[Q]], align 4 ; CHECK-MEMSSA-NEXT: ret void ; @@ -144,25 +144,25 @@ if.else: !8 = !{!"brick red", !5} !9 = !{!"observable universe"} ;. -; CHECK-MEMDEP: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK-MEMDEP: [[RED_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK-MEMDEP: [[META1]] = !{!"red", [[META2:![0-9]+]]} ; CHECK-MEMDEP: [[META2]] = !{} -; CHECK-MEMDEP: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK-MEMDEP: [[BLU_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} ; CHECK-MEMDEP: [[META4]] = !{!"blu", [[META2]]} -; CHECK-MEMDEP: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK-MEMDEP: [[OUTER_SPACE_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} ; CHECK-MEMDEP: [[META6]] = !{!"outer space", [[META7:![0-9]+]]} ; CHECK-MEMDEP: [[META7]] = !{!"observable universe"} -; CHECK-MEMDEP: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +; CHECK-MEMDEP: [[BRICK_RED_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} ; CHECK-MEMDEP: [[META9]] = !{!"brick red", [[META1]]} ;. -; CHECK-MEMSSA: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK-MEMSSA: [[RED_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK-MEMSSA: [[META1]] = !{!"red", [[META2:![0-9]+]]} ; CHECK-MEMSSA: [[META2]] = !{} -; CHECK-MEMSSA: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK-MEMSSA: [[BLU_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} ; CHECK-MEMSSA: [[META4]] = !{!"blu", [[META2]]} -; CHECK-MEMSSA: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK-MEMSSA: [[OUTER_SPACE_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} ; CHECK-MEMSSA: [[META6]] = !{!"outer space", [[META7:![0-9]+]]} ; CHECK-MEMSSA: [[META7]] = !{!"observable universe"} -; CHECK-MEMSSA: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +; CHECK-MEMSSA: [[BRICK_RED_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} ; CHECK-MEMSSA: [[META9]] = !{!"brick red", [[META1]]} ;. diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll index 47dd886bb9f17..f605b516e019e 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -aa-pipeline=tbaa,basic-aa -passes=memcpyopt,instcombine < %s | FileCheck %s target datalayout = "e-p:64:64:64" @@ -7,10 +7,12 @@ target datalayout = "e-p:64:64:64" ; it has a TBAA tag which declares that it is unrelated. define void @foo(ptr nocapture %p, ptr nocapture %q, ptr nocapture %s) nounwind { -; CHECK: @foo -; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) %p, ptr noundef nonnull align 1 dereferenceable(16) %q, i64 16, i1 false), !tbaa !0 -; CHECK-NEXT: store i8 2, ptr %s, align 1, !tbaa [[TAGA:!.*]] -; CHECK-NEXT: ret void +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ptr captures(none) [[P:%.*]], ptr captures(none) [[Q:%.*]], ptr captures(none) [[S:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[P]], ptr noundef nonnull align 1 dereferenceable(16) [[Q]], i64 16, i1 false), !tbaa [[B_TBAA0:![0-9]+]] +; CHECK-NEXT: store i8 2, ptr [[S]], align 1, !tbaa [[A_TBAA3:![0-9]+]] +; CHECK-NEXT: ret void +; tail call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %q, i64 16, i1 false), !tbaa !2 store i8 2, ptr %s, align 1, !tbaa !1 tail call void @llvm.memcpy.p0.p0.i64(ptr %q, ptr %p, i64 16, i1 false), !tbaa !2 @@ -19,10 +21,15 @@ define void @foo(ptr nocapture %p, ptr nocapture %q, ptr nocapture %s) nounwind declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind -; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0} -; CHECK: [[TYPEA]] = !{!"A", !{{.*}}} !0 = !{!"tbaa root"} !1 = !{!3, !3, i64 0} !2 = !{!4, !4, i64 0} !3 = !{!"A", !0} !4 = !{!"B", !0} +;. +; CHECK: [[B_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"B", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"tbaa root"} +; CHECK: [[A_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"A", [[META2]]} +;. diff --git a/llvm/test/Bitcode/upgrade-masked-keep-metadata.ll b/llvm/test/Bitcode/upgrade-masked-keep-metadata.ll index 0bcdfed808814..a4667ab62f789 100644 --- a/llvm/test/Bitcode/upgrade-masked-keep-metadata.ll +++ b/llvm/test/Bitcode/upgrade-masked-keep-metadata.ll @@ -1,9 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S < %s | FileCheck %s define <4 x i32> @load(ptr nocapture readonly %a0) !dbg !8 { -; CHECK-LABEL: @load( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 16, <4 x i1> , <4 x i32> undef), !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA20:![0-9]+]] +; CHECK-LABEL: define <4 x i32> @load( +; CHECK-SAME: ptr readonly captures(none) [[A0:%.*]]) !dbg [[DBG8:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0]], i32 16, <4 x i1> , <4 x i32> undef), !dbg [[DBG19:![0-9]+]], !tbaa [[CHAR_TBAA20:![0-9]+]] ; CHECK-NEXT: ret <4 x i32> [[V0]], !dbg [[DBG23:![0-9]+]] ; entry: @@ -12,9 +13,10 @@ entry: } define void @store(<4 x i32> %a0, ptr nocapture %a1) !dbg !24 { -; CHECK-LABEL: @store( -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 16, <4 x i1> ), !dbg [[DBG30:![0-9]+]], !tbaa [[TBAA20]] +; CHECK-LABEL: define void @store( +; CHECK-SAME: <4 x i32> [[A0:%.*]], ptr captures(none) [[A1:%.*]]) !dbg [[DBG24:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0]], ptr [[A1]], i32 16, <4 x i1> ), !dbg [[DBG30:![0-9]+]], !tbaa [[CHAR_TBAA20]] ; CHECK-NEXT: ret void, !dbg [[DBG31:![0-9]+]] ; entry: @@ -23,9 +25,10 @@ entry: } define <4 x i32> @gather(<4 x ptr> %a0) !dbg !32 { -; CHECK-LABEL: @gather( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[A0:%.*]], i32 16, <4 x i1> , <4 x i32> undef), !dbg [[DBG35:![0-9]+]], !tbaa [[TBAA20]] +; CHECK-LABEL: define <4 x i32> @gather( +; CHECK-SAME: <4 x ptr> [[A0:%.*]]) !dbg [[DBG32:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[A0]], i32 16, <4 x i1> , <4 x i32> undef), !dbg [[DBG35:![0-9]+]], !tbaa [[CHAR_TBAA20]] ; CHECK-NEXT: ret <4 x i32> [[V0]], !dbg [[DBG36:![0-9]+]] ; entry: @@ -34,9 +37,10 @@ entry: } define void @scatter(<4 x i32> %a0, <4 x ptr> %a1) !dbg !37 { -; CHECK-LABEL: @scatter( -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[A0:%.*]], <4 x ptr> [[A1:%.*]], i32 16, <4 x i1> ), !dbg [[DBG41:![0-9]+]], !tbaa [[TBAA20]] +; CHECK-LABEL: define void @scatter( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x ptr> [[A1:%.*]]) !dbg [[DBG37:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[A0]], <4 x ptr> [[A1]], i32 16, <4 x i1> ), !dbg [[DBG41:![0-9]+]], !tbaa [[CHAR_TBAA20]] ; CHECK-NEXT: ret void, !dbg [[DBG42:![0-9]+]] ; entry: @@ -45,9 +49,10 @@ entry: } define <4 x i32> @expandload(ptr nocapture readonly %a0) !dbg !43 { -; CHECK-LABEL: @expandload( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.expandload.v4i32(ptr [[A0:%.*]], <4 x i1> , <4 x i32> undef), !dbg [[DBG49:![0-9]+]], !tbaa [[TBAA50:![0-9]+]] +; CHECK-LABEL: define <4 x i32> @expandload( +; CHECK-SAME: ptr readonly captures(none) [[A0:%.*]]) !dbg [[DBG43:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.expandload.v4i32(ptr [[A0]], <4 x i1> , <4 x i32> undef), !dbg [[DBG49:![0-9]+]], !tbaa [[INT_TBAA50:![0-9]+]] ; CHECK-NEXT: ret <4 x i32> [[V0]], !dbg [[DBG52:![0-9]+]] ; entry: @@ -56,9 +61,10 @@ entry: } define void @compressstore(<4 x i32> %a0, ptr nocapture %a1) !dbg !53 { -; CHECK-LABEL: @compressstore( -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.masked.compressstore.v4i32(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <4 x i1> ), !dbg [[DBG59:![0-9]+]], !tbaa [[TBAA50]] +; CHECK-LABEL: define void @compressstore( +; CHECK-SAME: <4 x i32> [[A0:%.*]], ptr captures(none) [[A1:%.*]]) !dbg [[DBG53:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.masked.compressstore.v4i32(<4 x i32> [[A0]], ptr [[A1]], <4 x i1> ), !dbg [[DBG59:![0-9]+]], !tbaa [[INT_TBAA50]] ; CHECK-NEXT: ret void, !dbg [[DBG60:![0-9]+]] ; entry: diff --git a/llvm/test/DebugInfo/unrolled-loop-remainder.ll b/llvm/test/DebugInfo/unrolled-loop-remainder.ll index f2bd855015e77..c6035ffa65e08 100644 --- a/llvm/test/DebugInfo/unrolled-loop-remainder.ll +++ b/llvm/test/DebugInfo/unrolled-loop-remainder.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=loop-unroll -unroll-runtime -unroll-allow-remainder -unroll-count=4 -unroll-remainder -S %s -o - | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -12,13 +12,14 @@ target triple = "x86_64-unknown-linux-gnu" define i32 @func_c() local_unnamed_addr #0 !dbg !14 { ; -; CHECK-LABEL: @func_c( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr @b, align 4, !dbg [[DBG17:![0-9]+]], !tbaa [[TBAA20:![0-9]+]] +; CHECK-LABEL: define i32 @func_c( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG14:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr @b, align 4, !dbg [[DBG17:![0-9]+]], !tbaa [[INT_TBAA20:![0-9]+]] ; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[DOTPR]], 0, !dbg [[DBG24:![0-9]+]] -; CHECK-NEXT: br i1 [[TOBOOL1]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]], !dbg [[DBG24]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[A_PROMOTED:%.*]] = load ptr, ptr @a, align 8, !dbg [[DBG25:![0-9]+]], !tbaa [[TBAA26:![0-9]+]] +; CHECK-NEXT: br i1 [[TOBOOL1]], label %[[FOR_END:.*]], label %[[FOR_BODY_LR_PH:.*]], !dbg [[DBG24]] +; CHECK: [[FOR_BODY_LR_PH]]: +; CHECK-NEXT: [[A_PROMOTED:%.*]] = load ptr, ptr @a, align 8, !dbg [[DBG25:![0-9]+]], !tbaa [[ANYPTR_TBAA26:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -2, [[DOTPR]], !dbg [[DBG24]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], -2, !dbg [[DBG24]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[DOTPR]], [[TMP1]], !dbg [[DBG24]] @@ -26,77 +27,77 @@ define i32 @func_c() local_unnamed_addr #0 !dbg !14 { ; CHECK-NEXT: [[TMP4:%.*]] = add nuw i32 [[TMP3]], 1, !dbg [[DBG24]] ; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP4]], 3, !dbg [[DBG24]] ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0, !dbg [[DBG24]] -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], !dbg [[DBG24]] -; CHECK: for.body.prol.preheader: -; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]], !dbg [[DBG24]] -; CHECK: for.body.prol: +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY_PROL_PREHEADER:.*]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]], !dbg [[DBG24]] +; CHECK: [[FOR_BODY_PROL_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY_PROL:.*]], !dbg [[DBG24]] +; CHECK: [[FOR_BODY_PROL]]: ; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i32, ptr [[A_PROMOTED]], i64 1, !dbg [[DBG28:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_PROL]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_PROL]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]] ; CHECK-NEXT: [[CONV_PROL:%.*]] = sext i32 [[TMP5]] to i64, !dbg [[DBG28]] ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[CONV_PROL]] to ptr, !dbg [[DBG28]] ; CHECK-NEXT: [[ADD_PROL:%.*]] = add nsw i32 [[DOTPR]], 2, !dbg [[DBG29:![0-9]+]] ; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]], !dbg [[DBG24]] -; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL_1:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !dbg [[DBG24]] -; CHECK: for.body.prol.1: +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[FOR_BODY_PROL_1:.*]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !dbg [[DBG24]] +; CHECK: [[FOR_BODY_PROL_1]]: ; CHECK-NEXT: [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1, !dbg [[DBG28]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_PROL_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_PROL_1]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]] ; CHECK-NEXT: [[CONV_PROL_1:%.*]] = sext i32 [[TMP7]] to i64, !dbg [[DBG28]] ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[CONV_PROL_1]] to ptr, !dbg [[DBG28]] ; CHECK-NEXT: [[ADD_PROL_1:%.*]] = add nsw i32 [[DOTPR]], 4, !dbg [[DBG29]] ; CHECK-NEXT: [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]], !dbg [[DBG24]] -; CHECK-NEXT: br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY_PROL_2:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]] -; CHECK: for.body.prol.2: +; CHECK-NEXT: br i1 [[PROL_ITER_CMP_1]], label %[[FOR_BODY_PROL_2:.*]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]] +; CHECK: [[FOR_BODY_PROL_2]]: ; CHECK-NEXT: [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1, !dbg [[DBG28]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_PROL_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_PROL_2]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]] ; CHECK-NEXT: [[CONV_PROL_2:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG28]] ; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[CONV_PROL_2]] to ptr, !dbg [[DBG28]] ; CHECK-NEXT: [[ADD_PROL_2:%.*]] = add nsw i32 [[DOTPR]], 6, !dbg [[DBG29]] -; CHECK-NEXT: br label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] -; CHECK: for.body.prol.loopexit.unr-lcssa: -; CHECK-NEXT: [[DOTLCSSA_UNR_PH:%.*]] = phi ptr [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP8]], [[FOR_BODY_PROL_1]] ], [ [[TMP10]], [[FOR_BODY_PROL_2]] ] -; CHECK-NEXT: [[DOTUNR_PH:%.*]] = phi ptr [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP8]], [[FOR_BODY_PROL_1]] ], [ [[TMP10]], [[FOR_BODY_PROL_2]] ] -; CHECK-NEXT: [[DOTUNR1_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[FOR_BODY_PROL]] ], [ [[ADD_PROL_1]], [[FOR_BODY_PROL_1]] ], [ [[ADD_PROL_2]], [[FOR_BODY_PROL_2]] ] -; CHECK-NEXT: br label [[FOR_BODY_PROL_LOOPEXIT]], !dbg [[DBG24]] -; CHECK: for.body.prol.loopexit: -; CHECK-NEXT: [[DOTLCSSA_UNR:%.*]] = phi ptr [ poison, [[FOR_BODY_LR_PH]] ], [ [[DOTLCSSA_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[DOTUNR:%.*]] = phi ptr [ [[A_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[DOTUNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[DOTUNR1:%.*]] = phi i32 [ [[DOTPR]], [[FOR_BODY_LR_PH]] ], [ [[DOTUNR1_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: br label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]] +; CHECK: [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[DOTLCSSA_UNR_PH:%.*]] = phi ptr [ [[TMP6]], %[[FOR_BODY_PROL]] ], [ [[TMP8]], %[[FOR_BODY_PROL_1]] ], [ [[TMP10]], %[[FOR_BODY_PROL_2]] ] +; CHECK-NEXT: [[DOTUNR_PH:%.*]] = phi ptr [ [[TMP6]], %[[FOR_BODY_PROL]] ], [ [[TMP8]], %[[FOR_BODY_PROL_1]] ], [ [[TMP10]], %[[FOR_BODY_PROL_2]] ] +; CHECK-NEXT: [[DOTUNR1_PH:%.*]] = phi i32 [ [[ADD_PROL]], %[[FOR_BODY_PROL]] ], [ [[ADD_PROL_1]], %[[FOR_BODY_PROL_1]] ], [ [[ADD_PROL_2]], %[[FOR_BODY_PROL_2]] ] +; CHECK-NEXT: br label %[[FOR_BODY_PROL_LOOPEXIT]], !dbg [[DBG24]] +; CHECK: [[FOR_BODY_PROL_LOOPEXIT]]: +; CHECK-NEXT: [[DOTLCSSA_UNR:%.*]] = phi ptr [ poison, %[[FOR_BODY_LR_PH]] ], [ [[DOTLCSSA_UNR_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[DOTUNR:%.*]] = phi ptr [ [[A_PROMOTED]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[DOTUNR1:%.*]] = phi i32 [ [[DOTPR]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR1_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP3]], 3, !dbg [[DBG24]] -; CHECK-NEXT: br i1 [[TMP11]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]], !dbg [[DBG24]] -; CHECK: for.body.lr.ph.new: -; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg [[DBG24]] -; CHECK: for.body: -; CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[DOTUNR]], [[FOR_BODY_LR_PH_NEW]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ], !dbg [[DBG28]] -; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[DOTUNR1]], [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br i1 [[TMP11]], label %[[FOR_COND_FOR_END_CRIT_EDGE:.*]], label %[[FOR_BODY_LR_PH_NEW:.*]], !dbg [[DBG24]] +; CHECK: [[FOR_BODY_LR_PH_NEW]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]], !dbg [[DBG24]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[DOTUNR]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[TMP21:%.*]], %[[FOR_BODY]] ], !dbg [[DBG28]] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[DOTUNR1]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 1, !dbg [[DBG28]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]] ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG28]] ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[CONV]] to ptr, !dbg [[DBG28]] ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 1, !dbg [[DBG28]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]] ; CHECK-NEXT: [[CONV_1:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG28]] ; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[CONV_1]] to ptr, !dbg [[DBG28]] ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 1, !dbg [[DBG28]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]] ; CHECK-NEXT: [[CONV_2:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG28]] ; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[CONV_2]] to ptr, !dbg [[DBG28]] ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 1, !dbg [[DBG28]] -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !dbg [[DBG28]], !tbaa [[INT_TBAA20]] ; CHECK-NEXT: [[CONV_3:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG28]] ; CHECK-NEXT: [[TMP21]] = inttoptr i64 [[CONV_3]] to ptr, !dbg [[DBG28]] ; CHECK-NEXT: [[ADD_3]] = add nsw i32 [[TMP13]], 8, !dbg [[DBG29]] ; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[ADD_3]], 0, !dbg [[DBG24]] -; CHECK-NEXT: br i1 [[TOBOOL_3]], label [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA:%.*]], label [[FOR_BODY]], !dbg [[DBG24]], !llvm.loop [[LOOP30:![0-9]+]] -; CHECK: for.cond.for.end_crit_edge.unr-lcssa: -; CHECK-NEXT: [[DOTLCSSA_PH:%.*]] = phi ptr [ [[TMP21]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_FOR_END_CRIT_EDGE]], !dbg [[DBG24]] -; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[DOTLCSSA_UNR]], [[FOR_BODY_PROL_LOOPEXIT]] ], [ [[DOTLCSSA_PH]], [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA]] ], !dbg [[DBG28]] +; CHECK-NEXT: br i1 [[TOBOOL_3]], label %[[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA:.*]], label %[[FOR_BODY]], !dbg [[DBG24]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK: [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA]]: +; CHECK-NEXT: [[DOTLCSSA_PH:%.*]] = phi ptr [ [[TMP21]], %[[FOR_BODY]] ] +; CHECK-NEXT: br label %[[FOR_COND_FOR_END_CRIT_EDGE]], !dbg [[DBG24]] +; CHECK: [[FOR_COND_FOR_END_CRIT_EDGE]]: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[DOTLCSSA_UNR]], %[[FOR_BODY_PROL_LOOPEXIT]] ], [ [[DOTLCSSA_PH]], %[[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA]] ], !dbg [[DBG28]] ; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP2]], 2, !dbg [[DBG24]] -; CHECK-NEXT: store ptr [[DOTLCSSA]], ptr @a, align 8, !dbg [[DBG25]], !tbaa [[TBAA26]] -; CHECK-NEXT: store i32 [[TMP22]], ptr @b, align 4, !dbg [[DBG33:![0-9]+]], !tbaa [[TBAA20]] -; CHECK-NEXT: br label [[FOR_END]], !dbg [[DBG24]] -; CHECK: for.end: +; CHECK-NEXT: store ptr [[DOTLCSSA]], ptr @a, align 8, !dbg [[DBG25]], !tbaa [[ANYPTR_TBAA26]] +; CHECK-NEXT: store i32 [[TMP22]], ptr @b, align 4, !dbg [[DBG33:![0-9]+]], !tbaa [[INT_TBAA20]] +; CHECK-NEXT: br label %[[FOR_END]], !dbg [[DBG24]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 undef, !dbg [[DBG34:![0-9]+]] ; entry: @@ -134,8 +135,9 @@ for.end: define void @func_d() local_unnamed_addr #1 !dbg !34 { ; -; CHECK-LABEL: @func_d( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @func_d( +; CHECK-SAME: ) local_unnamed_addr !dbg [[DBG35:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: ret void, !dbg [[DBG38:![0-9]+]] ; entry: diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll index 05d4d2a6551f5..48de5d1717134 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes="msan" -msan-instrumentation-with-call-threshold=0 | FileCheck %s ; ; This test illustrates a bug in MemorySanitizer that will shortly be fixed @@ -16,7 +16,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[DOTPRE:%.*]] = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: [[DOTPRE:%.*]] = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa [[CHAR_TBAA1:![0-9]+]] ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr inttoptr (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576) to ptr), align 8 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576), i64 35184372088832) to ptr), align 8 ; CHECK-NEXT: br label %[[FOR_COND:.*]] @@ -36,7 +36,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-NEXT: [[CALL:%.*]] = tail call noundef i32 @_Z1b11__Int16x4_tS_(<4 x i16> noundef [[TMP1]], <4 x i16> noundef [[LANE]]) ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[CONV]] to ptr -; CHECK-NEXT: [[TMP5]] = load <4 x i16>, ptr [[TMP4]], align 8, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP5]] = load <4 x i16>, ptr [[TMP4]], align 8, !tbaa [[CHAR_TBAA1]] ; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr @@ -47,7 +47,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-NEXT: store <4 x i16> [[_MSLD3]], ptr inttoptr (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576) to ptr), align 8 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[_MSLD3]] to i64 ; CHECK-NEXT: call void @__msan_maybe_store_origin_8(i64 zeroext [[TMP12]], ptr @_Z1cv, i32 zeroext [[TMP11]]) -; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr @_Z1cv, align 8, !tbaa [[TBAA1]] +; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr @_Z1cv, align 8, !tbaa [[CHAR_TBAA1]] ; CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] ; entry: @@ -76,7 +76,7 @@ attributes #0 = { mustprogress noreturn nounwind sanitize_memory "no-trapping-ma !5 = distinct !{!5, !6} !6 = !{!"llvm.loop.mustprogress"} ;. -; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[CHAR_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"Simple C++ TBAA"} ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]} diff --git a/llvm/test/Instrumentation/TypeSanitizer/access-with-offset.ll b/llvm/test/Instrumentation/TypeSanitizer/access-with-offset.ll index 56cf3f528f836..84e0f7307c7ec 100644 --- a/llvm/test/Instrumentation/TypeSanitizer/access-with-offset.ll +++ b/llvm/test/Instrumentation/TypeSanitizer/access-with-offset.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt -passes='tysan' -S %s | FileCheck %s ;. @@ -12,8 +12,9 @@ ; CHECK: @__tysan_app_memory_mask = external global i64 ;. define ptr @test_load_offset(ptr %argv) { -; CHECK-LABEL: @test_load_offset( -; CHECK-NEXT: entry: +; CHECK-LABEL: define ptr @test_load_offset( +; CHECK-SAME: ptr [[ARGV:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 4 ; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 4 ; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 0, [[APP_MEM_MASK]] @@ -22,8 +23,8 @@ define ptr @test_load_offset(ptr %argv) { ; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr ; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[DESC_SET:%.*]] = icmp eq ptr [[SHADOW_DESC]], null -; CHECK-NEXT: br i1 [[DESC_SET]], label [[SET_TYPE:%.*]], label [[TMP0:%.*]], !prof [[PROF0:![0-9]+]] -; CHECK: set.type: +; CHECK-NEXT: br i1 [[DESC_SET]], label %[[SET_TYPE:.*]], label %[[BB0:.*]], !prof [[PROF0:![0-9]+]] +; CHECK: [[SET_TYPE]]: ; CHECK-NEXT: store ptr @__tysan_v1_any_20pointer_o_0, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr @@ -46,9 +47,9 @@ define ptr @test_load_offset(ptr %argv) { ; CHECK-NEXT: [[SHADOW_BYTE_7_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 56 ; CHECK-NEXT: [[SHADOW_BYTE_7_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_7_OFFSET]] to ptr ; CHECK-NEXT: store ptr inttoptr (i64 -7 to ptr), ptr [[SHADOW_BYTE_7_PTR]], align 8 -; CHECK-NEXT: br label [[TMP0]] -; CHECK: 0: -; CHECK-NEXT: [[L:%.*]] = load ptr, ptr null, align 8, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: br label %[[BB0]] +; CHECK: [[BB0]]: +; CHECK-NEXT: [[L:%.*]] = load ptr, ptr null, align 8, !tbaa [[ANYPTR_TBAA1:![0-9]+]] ; CHECK-NEXT: ret ptr [[L]] ; entry: @@ -64,7 +65,7 @@ entry: ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000} -; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[ANYPTR_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} ; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/Instrumentation/TypeSanitizer/anon.ll b/llvm/test/Instrumentation/TypeSanitizer/anon.ll index 37de1b71e0c7e..1f0f1bd7ace15 100644 --- a/llvm/test/Instrumentation/TypeSanitizer/anon.ll +++ b/llvm/test/Instrumentation/TypeSanitizer/anon.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; Test basic type sanitizer instrumentation. ; ; RUN: opt -passes='tysan' -S %s | FileCheck %s @@ -23,22 +23,23 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @llvm.used = appending global [6 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24], section "llvm.metadata" ;. define void @test_anon_ns(ptr %a, ptr %b) sanitize_type { -; CHECK-LABEL: @test_anon_ns( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test_anon_ns( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 ; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 -; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64 +; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64 ; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]] ; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3 ; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]] ; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr ; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24 -; CHECK-NEXT: br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP22:%.*]], !prof [[PROF0:![0-9]+]] -; CHECK: 0: +; CHECK-NEXT: br i1 [[BAD_DESC]], label %[[BB0:.*]], label %[[BB22:.*]], !prof [[PROF0:![0-9]+]] +; CHECK: [[BB0]]: ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null -; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]] -; CHECK: 2: +; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB20:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 @@ -54,11 +55,11 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type { ; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF0]] -; CHECK: 18: +; CHECK-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF0]] +; CHECK: [[BB18]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24, i32 2) -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 19: +; CHECK-NEXT: br label %[[BB19]] +; CHECK: [[BB19]]: ; CHECK-NEXT: store ptr @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr @@ -69,13 +70,13 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type { ; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 ; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr ; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8 -; CHECK-NEXT: br label [[TMP21:%.*]] -; CHECK: 20: +; CHECK-NEXT: br label %[[BB21:.*]] +; CHECK: [[BB20]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24, i32 2) -; CHECK-NEXT: br label [[TMP21]] -; CHECK: 21: -; CHECK-NEXT: br label [[TMP43:%.*]] -; CHECK: 22: +; CHECK-NEXT: br label %[[BB21]] +; CHECK: [[BB21]]: +; CHECK-NEXT: br label %[[BB43:.*]] +; CHECK: [[BB22]]: ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr ; CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 @@ -94,26 +95,26 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type { ; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64 ; CHECK-NEXT: [[TMP39:%.*]] = icmp sge i64 [[TMP38]], 0 ; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP34]], [[TMP39]] -; CHECK-NEXT: br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP42:%.*]], !prof [[PROF0]] -; CHECK: 41: +; CHECK-NEXT: br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42:.*]], !prof [[PROF0]] +; CHECK: [[BB41]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTSN12__GLOBAL____N__11zE_o_24, i32 2) -; CHECK-NEXT: br label [[TMP42]] -; CHECK: 42: -; CHECK-NEXT: br label [[TMP43]] -; CHECK: 43: -; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[TBAA1:![0-9]+]] -; CHECK-NEXT: [[APP_PTR_INT1:%.*]] = ptrtoint ptr [[B:%.*]] to i64 +; CHECK-NEXT: br label %[[BB42]] +; CHECK: [[BB42]]: +; CHECK-NEXT: br label %[[BB43]] +; CHECK: [[BB43]]: +; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] +; CHECK-NEXT: [[APP_PTR_INT1:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[APP_PTR_MASKED2:%.*]] = and i64 [[APP_PTR_INT1]], [[APP_MEM_MASK]] ; CHECK-NEXT: [[APP_PTR_SHIFTED3:%.*]] = shl i64 [[APP_PTR_MASKED2]], 3 ; CHECK-NEXT: [[SHADOW_PTR_INT4:%.*]] = add i64 [[APP_PTR_SHIFTED3]], [[SHADOW_BASE]] ; CHECK-NEXT: [[SHADOW_PTR5:%.*]] = inttoptr i64 [[SHADOW_PTR_INT4]] to ptr ; CHECK-NEXT: [[SHADOW_DESC6:%.*]] = load ptr, ptr [[SHADOW_PTR5]], align 8 ; CHECK-NEXT: [[BAD_DESC7:%.*]] = icmp ne ptr [[SHADOW_DESC6]], @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24 -; CHECK-NEXT: br i1 [[BAD_DESC7]], label [[TMP44:%.*]], label [[TMP66:%.*]], !prof [[PROF0]] -; CHECK: 44: +; CHECK-NEXT: br i1 [[BAD_DESC7]], label %[[BB44:.*]], label %[[BB66:.*]], !prof [[PROF0]] +; CHECK: [[BB44]]: ; CHECK-NEXT: [[TMP45:%.*]] = icmp eq ptr [[SHADOW_DESC6]], null -; CHECK-NEXT: br i1 [[TMP45]], label [[TMP46:%.*]], label [[TMP64:%.*]] -; CHECK: 46: +; CHECK-NEXT: br i1 [[TMP45]], label %[[BB46:.*]], label %[[BB64:.*]] +; CHECK: [[BB46]]: ; CHECK-NEXT: [[TMP47:%.*]] = add i64 [[SHADOW_PTR_INT4]], 8 ; CHECK-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr ; CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[TMP48]], align 8 @@ -129,11 +130,11 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type { ; CHECK-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP58]], align 8 ; CHECK-NEXT: [[TMP60:%.*]] = icmp ne ptr [[TMP59]], null ; CHECK-NEXT: [[TMP61:%.*]] = or i1 [[TMP56]], [[TMP60]] -; CHECK-NEXT: br i1 [[TMP61]], label [[TMP62:%.*]], label [[TMP63:%.*]], !prof [[PROF0]] -; CHECK: 62: +; CHECK-NEXT: br i1 [[TMP61]], label %[[BB62:.*]], label %[[BB63:.*]], !prof [[PROF0]] +; CHECK: [[BB62]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[B]], i32 4, ptr @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24, i32 2) -; CHECK-NEXT: br label [[TMP63]] -; CHECK: 63: +; CHECK-NEXT: br label %[[BB63]] +; CHECK: [[BB63]]: ; CHECK-NEXT: store ptr @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24, ptr [[SHADOW_PTR5]], align 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET8:%.*]] = add i64 [[SHADOW_PTR_INT4]], 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_PTR9:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET8]] to ptr @@ -144,13 +145,13 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type { ; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET12:%.*]] = add i64 [[SHADOW_PTR_INT4]], 24 ; CHECK-NEXT: [[SHADOW_BYTE_3_PTR13:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET12]] to ptr ; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR13]], align 8 -; CHECK-NEXT: br label [[TMP65:%.*]] -; CHECK: 64: +; CHECK-NEXT: br label %[[BB65:.*]] +; CHECK: [[BB64]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[B]], i32 4, ptr @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24, i32 2) -; CHECK-NEXT: br label [[TMP65]] -; CHECK: 65: -; CHECK-NEXT: br label [[TMP87:%.*]] -; CHECK: 66: +; CHECK-NEXT: br label %[[BB65]] +; CHECK: [[BB65]]: +; CHECK-NEXT: br label %[[BB87:.*]] +; CHECK: [[BB66]]: ; CHECK-NEXT: [[TMP67:%.*]] = add i64 [[SHADOW_PTR_INT4]], 8 ; CHECK-NEXT: [[TMP68:%.*]] = inttoptr i64 [[TMP67]] to ptr ; CHECK-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP68]], align 8 @@ -169,14 +170,14 @@ define void @test_anon_ns(ptr %a, ptr %b) sanitize_type { ; CHECK-NEXT: [[TMP82:%.*]] = ptrtoint ptr [[TMP81]] to i64 ; CHECK-NEXT: [[TMP83:%.*]] = icmp sge i64 [[TMP82]], 0 ; CHECK-NEXT: [[TMP84:%.*]] = or i1 [[TMP78]], [[TMP83]] -; CHECK-NEXT: br i1 [[TMP84]], label [[TMP85:%.*]], label [[TMP86:%.*]], !prof [[PROF0]] -; CHECK: 85: +; CHECK-NEXT: br i1 [[TMP84]], label %[[BB85:.*]], label %[[BB86:.*]], !prof [[PROF0]] +; CHECK: [[BB85]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[B]], i32 4, ptr @__tysan_v1___ZTS1yIN12__GLOBAL____N__11zEE_o_24, i32 2) -; CHECK-NEXT: br label [[TMP86]] -; CHECK: 86: -; CHECK-NEXT: br label [[TMP87]] -; CHECK: 87: -; CHECK-NEXT: store i32 43, ptr [[B]], align 4, !tbaa [[TBAA6:![0-9]+]] +; CHECK-NEXT: br label %[[BB86]] +; CHECK: [[BB86]]: +; CHECK-NEXT: br label %[[BB87]] +; CHECK: [[BB87]]: +; CHECK-NEXT: store i32 43, ptr [[B]], align 4, !tbaa [[INT_TBAA6:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -187,22 +188,23 @@ entry: } define void @test_anon_type(ptr %a) sanitize_type { -; CHECK-LABEL: @test_anon_type( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test_anon_type( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 ; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 -; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64 +; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64 ; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]] ; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3 ; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]] ; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr ; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24 -; CHECK-NEXT: br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP22:%.*]], !prof [[PROF0]] -; CHECK: 0: +; CHECK-NEXT: br i1 [[BAD_DESC]], label %[[BB0:.*]], label %[[BB22:.*]], !prof [[PROF0]] +; CHECK: [[BB0]]: ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null -; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]] -; CHECK: 2: +; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB20:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 @@ -218,11 +220,11 @@ define void @test_anon_type(ptr %a) sanitize_type { ; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF0]] -; CHECK: 18: +; CHECK-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF0]] +; CHECK: [[BB18]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24, i32 2) -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 19: +; CHECK-NEXT: br label %[[BB19]] +; CHECK: [[BB19]]: ; CHECK-NEXT: store ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr @@ -233,13 +235,13 @@ define void @test_anon_type(ptr %a) sanitize_type { ; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 ; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr ; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8 -; CHECK-NEXT: br label [[TMP21:%.*]] -; CHECK: 20: +; CHECK-NEXT: br label %[[BB21:.*]] +; CHECK: [[BB20]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24, i32 2) -; CHECK-NEXT: br label [[TMP21]] -; CHECK: 21: -; CHECK-NEXT: br label [[TMP43:%.*]] -; CHECK: 22: +; CHECK-NEXT: br label %[[BB21]] +; CHECK: [[BB21]]: +; CHECK-NEXT: br label %[[BB43:.*]] +; CHECK: [[BB22]]: ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr ; CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 @@ -258,14 +260,14 @@ define void @test_anon_type(ptr %a) sanitize_type { ; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64 ; CHECK-NEXT: [[TMP39:%.*]] = icmp sge i64 [[TMP38]], 0 ; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP34]], [[TMP39]] -; CHECK-NEXT: br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP42:%.*]], !prof [[PROF0]] -; CHECK: 41: +; CHECK-NEXT: br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42:.*]], !prof [[PROF0]] +; CHECK: [[BB41]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_____anonymous__027d9e575c5d34cb5d60d6a1d6276f95_o_24, i32 2) -; CHECK-NEXT: br label [[TMP42]] -; CHECK: 42: -; CHECK-NEXT: br label [[TMP43]] -; CHECK: 43: -; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: br label %[[BB42]] +; CHECK: [[BB42]]: +; CHECK-NEXT: br label %[[BB43]] +; CHECK: [[BB43]]: +; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[INT_TBAA8:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -284,17 +286,17 @@ entry: !11 = !{!"", !2, i64 24} !12 = !{!11, !2, i64 24} ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type } +; CHECK: attributes #[[ATTR0]] = { sanitize_type } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000} -; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META3:![0-9]+]], i64 24} +; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META3:![0-9]+]], i64 24} ; CHECK: [[META2]] = !{!"_ZTSN12_GLOBAL__N_11zE", [[META3]], i64 24} ; CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} ; CHECK: [[META5]] = !{!"Simple C++ TBAA"} -; CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META3]], i64 24} +; CHECK: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META3]], i64 24} ; CHECK: [[META7]] = !{!"_ZTS1yIN12_GLOBAL__N_11zEE", [[META3]], i64 24} -; CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META3]], i64 24} +; CHECK: [[INT_TBAA8]] = !{[[META9:![0-9]+]], [[META3]], i64 24} ; CHECK: [[META9]] = !{!"", [[META3]], i64 24} ;. diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic-nosan.ll b/llvm/test/Instrumentation/TypeSanitizer/basic-nosan.ll index 8ddc5738a673d..c1a452d629b7b 100644 --- a/llvm/test/Instrumentation/TypeSanitizer/basic-nosan.ll +++ b/llvm/test/Instrumentation/TypeSanitizer/basic-nosan.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 6 ; Test basic type sanitizer instrumentation. ; RUN: opt -passes='tysan' -S %s | FileCheck %s @@ -31,19 +31,20 @@ entry: ; CHECK: @__tysan_shadow_memory_address = external global i64 ; CHECK: @__tysan_app_memory_mask = external global i64 ;. -; CHECK-LABEL: @test_load_nsan( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test_load_nsan( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 ; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 -; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64 +; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64 ; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]] ; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3 ; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]] ; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr ; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[DESC_SET:%.*]] = icmp eq ptr [[SHADOW_DESC]], null -; CHECK-NEXT: br i1 [[DESC_SET]], label [[SET_TYPE:%.*]], label [[TMP0:%.*]], !prof [[PROF0:![0-9]+]] -; CHECK: set.type: +; CHECK-NEXT: br i1 [[DESC_SET]], label %[[SET_TYPE:.*]], label %[[BB0:.*]], !prof [[PROF0:![0-9]+]] +; CHECK: [[SET_TYPE]]: ; CHECK-NEXT: store ptr @__tysan_v1_int_o_0, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr @@ -54,25 +55,26 @@ entry: ; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 ; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr ; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8 -; CHECK-NEXT: br label [[TMP0]] -; CHECK: 0: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: br label %[[BB0]] +; CHECK: [[BB0]]: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] ; CHECK-NEXT: ret i32 [[TMP1]] ; ; -; CHECK-LABEL: @test_store_nsan( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test_store_nsan( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 ; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 -; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64 +; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64 ; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]] ; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3 ; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]] ; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr ; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[DESC_SET:%.*]] = icmp eq ptr [[SHADOW_DESC]], null -; CHECK-NEXT: br i1 [[DESC_SET]], label [[SET_TYPE:%.*]], label [[TMP0:%.*]], !prof [[PROF0]] -; CHECK: set.type: +; CHECK-NEXT: br i1 [[DESC_SET]], label %[[SET_TYPE:.*]], label %[[BB0:.*]], !prof [[PROF0]] +; CHECK: [[SET_TYPE]]: ; CHECK-NEXT: store ptr @__tysan_v1_int_o_0, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr @@ -83,21 +85,22 @@ entry: ; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 ; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr ; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8 -; CHECK-NEXT: br label [[TMP0]] -; CHECK: 0: -; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: br label %[[BB0]] +; CHECK: [[BB0]]: +; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: @tysan.module_ctor( +; CHECK-LABEL: define internal void @tysan.module_ctor( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: call void @__tysan_init() ; CHECK-NEXT: ret void ; ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR0]] = { nounwind } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000} -; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} ; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C++ TBAA"} diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic.ll b/llvm/test/Instrumentation/TypeSanitizer/basic.ll index b40b64664502a..ae7ac5304dc08 100644 --- a/llvm/test/Instrumentation/TypeSanitizer/basic.ll +++ b/llvm/test/Instrumentation/TypeSanitizer/basic.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; Test basic type sanitizer instrumentation. ; ; RUN: opt -passes='tysan' -S %s | FileCheck %s @@ -21,22 +21,23 @@ declare i32 @declaration_only(i32 %a) sanitize_type ; CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_int_o_0, ptr @__tysan_v1___ZTS1x, ptr @__tysan_v1___ZTS1v, ptr @__tysan_v1___ZTS1v_o_12], section "llvm.metadata" ;. define i32 @test_load(ptr %a) sanitize_type { -; CHECK-LABEL: @test_load( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test_load( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 ; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 -; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64 +; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64 ; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]] ; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3 ; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]] ; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr ; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], @__tysan_v1_int_o_0 -; CHECK-NEXT: br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP22:%.*]], !prof [[PROF0:![0-9]+]] -; CHECK: 0: +; CHECK-NEXT: br i1 [[BAD_DESC]], label %[[BB0:.*]], label %[[BB22:.*]], !prof [[PROF0:![0-9]+]] +; CHECK: [[BB0]]: ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null -; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]] -; CHECK: 2: +; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB20:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 @@ -52,11 +53,11 @@ define i32 @test_load(ptr %a) sanitize_type { ; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF0]] -; CHECK: 18: +; CHECK-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF0]] +; CHECK: [[BB18]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1) -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 19: +; CHECK-NEXT: br label %[[BB19]] +; CHECK: [[BB19]]: ; CHECK-NEXT: store ptr @__tysan_v1_int_o_0, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr @@ -67,13 +68,13 @@ define i32 @test_load(ptr %a) sanitize_type { ; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 ; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr ; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8 -; CHECK-NEXT: br label [[TMP21:%.*]] -; CHECK: 20: +; CHECK-NEXT: br label %[[BB21:.*]] +; CHECK: [[BB20]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1) -; CHECK-NEXT: br label [[TMP21]] -; CHECK: 21: -; CHECK-NEXT: br label [[TMP43:%.*]] -; CHECK: 22: +; CHECK-NEXT: br label %[[BB21]] +; CHECK: [[BB21]]: +; CHECK-NEXT: br label %[[BB43:.*]] +; CHECK: [[BB22]]: ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr ; CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 @@ -92,14 +93,14 @@ define i32 @test_load(ptr %a) sanitize_type { ; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64 ; CHECK-NEXT: [[TMP39:%.*]] = icmp sge i64 [[TMP38]], 0 ; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP34]], [[TMP39]] -; CHECK-NEXT: br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP42:%.*]], !prof [[PROF0]] -; CHECK: 41: +; CHECK-NEXT: br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42:.*]], !prof [[PROF0]] +; CHECK: [[BB41]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1_int_o_0, i32 1) -; CHECK-NEXT: br label [[TMP42]] -; CHECK: 42: -; CHECK-NEXT: br label [[TMP43]] -; CHECK: 43: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: br label %[[BB42]] +; CHECK: [[BB42]]: +; CHECK-NEXT: br label %[[BB43]] +; CHECK: [[BB43]]: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] ; CHECK-NEXT: ret i32 [[TMP1]] ; entry: @@ -108,22 +109,23 @@ entry: } define void @test_store(ptr %a) sanitize_type { -; CHECK-LABEL: @test_store( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test_store( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[APP_MEM_MASK:%.*]] = load i64, ptr @__tysan_app_memory_mask, align 8 ; CHECK-NEXT: [[SHADOW_BASE:%.*]] = load i64, ptr @__tysan_shadow_memory_address, align 8 -; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A:%.*]] to i64 +; CHECK-NEXT: [[APP_PTR_INT:%.*]] = ptrtoint ptr [[A]] to i64 ; CHECK-NEXT: [[APP_PTR_MASKED:%.*]] = and i64 [[APP_PTR_INT]], [[APP_MEM_MASK]] ; CHECK-NEXT: [[APP_PTR_SHIFTED:%.*]] = shl i64 [[APP_PTR_MASKED]], 3 ; CHECK-NEXT: [[SHADOW_PTR_INT:%.*]] = add i64 [[APP_PTR_SHIFTED]], [[SHADOW_BASE]] ; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_PTR_INT]] to ptr ; CHECK-NEXT: [[SHADOW_DESC:%.*]] = load ptr, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[BAD_DESC:%.*]] = icmp ne ptr [[SHADOW_DESC]], @__tysan_v1___ZTS1v_o_12 -; CHECK-NEXT: br i1 [[BAD_DESC]], label [[TMP0:%.*]], label [[TMP22:%.*]], !prof [[PROF0]] -; CHECK: 0: +; CHECK-NEXT: br i1 [[BAD_DESC]], label %[[BB0:.*]], label %[[BB22:.*]], !prof [[PROF0]] +; CHECK: [[BB0]]: ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[SHADOW_DESC]], null -; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]] -; CHECK: 2: +; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB20:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 @@ -139,11 +141,11 @@ define void @test_store(ptr %a) sanitize_type { ; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ne ptr [[TMP15]], null ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF0]] -; CHECK: 18: +; CHECK-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF0]] +; CHECK: [[BB18]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2) -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 19: +; CHECK-NEXT: br label %[[BB19]] +; CHECK: [[BB19]]: ; CHECK-NEXT: store ptr @__tysan_v1___ZTS1v_o_12, ptr [[SHADOW_PTR]], align 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[SHADOW_BYTE_1_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_1_OFFSET]] to ptr @@ -154,13 +156,13 @@ define void @test_store(ptr %a) sanitize_type { ; CHECK-NEXT: [[SHADOW_BYTE_3_OFFSET:%.*]] = add i64 [[SHADOW_PTR_INT]], 24 ; CHECK-NEXT: [[SHADOW_BYTE_3_PTR:%.*]] = inttoptr i64 [[SHADOW_BYTE_3_OFFSET]] to ptr ; CHECK-NEXT: store ptr inttoptr (i64 -3 to ptr), ptr [[SHADOW_BYTE_3_PTR]], align 8 -; CHECK-NEXT: br label [[TMP21:%.*]] -; CHECK: 20: +; CHECK-NEXT: br label %[[BB21:.*]] +; CHECK: [[BB20]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2) -; CHECK-NEXT: br label [[TMP21]] -; CHECK: 21: -; CHECK-NEXT: br label [[TMP43:%.*]] -; CHECK: 22: +; CHECK-NEXT: br label %[[BB21]] +; CHECK: [[BB21]]: +; CHECK-NEXT: br label %[[BB43:.*]] +; CHECK: [[BB22]]: ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[SHADOW_PTR_INT]], 8 ; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr ; CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 @@ -179,14 +181,14 @@ define void @test_store(ptr %a) sanitize_type { ; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64 ; CHECK-NEXT: [[TMP39:%.*]] = icmp sge i64 [[TMP38]], 0 ; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP34]], [[TMP39]] -; CHECK-NEXT: br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP42:%.*]], !prof [[PROF0]] -; CHECK: 41: +; CHECK-NEXT: br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42:.*]], !prof [[PROF0]] +; CHECK: [[BB41]]: ; CHECK-NEXT: call void @__tysan_check(ptr [[A]], i32 4, ptr @__tysan_v1___ZTS1v_o_12, i32 2) -; CHECK-NEXT: br label [[TMP42]] -; CHECK: 42: -; CHECK-NEXT: br label [[TMP43]] -; CHECK: 43: -; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: br label %[[BB42]] +; CHECK: [[BB42]]: +; CHECK-NEXT: br label %[[BB43]] +; CHECK: [[BB43]]: +; CHECK-NEXT: store i32 42, ptr [[A]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -202,15 +204,15 @@ entry: !5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16} !6 = !{!5, !2, i64 12} ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type } +; CHECK: attributes #[[ATTR0]] = { sanitize_type } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 100000} -; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} ; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C++ TBAA"} -; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META2]], i64 12} +; CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META2]], i64 12} ; CHECK: [[META6]] = !{!"_ZTS1v", [[META2]], i64 8, [[META2]], i64 12, [[META7:![0-9]+]], i64 16} ; CHECK: [[META7]] = !{!"_ZTS1x", [[META2]], i64 0, [[META2]], i64 4} ;. diff --git a/llvm/test/Instrumentation/TypeSanitizer/nosanitize.ll b/llvm/test/Instrumentation/TypeSanitizer/nosanitize.ll index c7c153e140fc2..d0ae3bcb435ba 100644 --- a/llvm/test/Instrumentation/TypeSanitizer/nosanitize.ll +++ b/llvm/test/Instrumentation/TypeSanitizer/nosanitize.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; Test basic type sanitizer instrumentation. ; ; RUN: opt -passes='tysan' -S %s | FileCheck %s @@ -10,9 +10,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @tysan.module_ctor, ptr null }] ;. define i32 @test_load(ptr %a) sanitize_type { -; CHECK-LABEL: @test_load( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]], !nosanitize [[META4:![0-9]+]] +; CHECK-LABEL: define i32 @test_load( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !tbaa [[INT_TBAA0:![0-9]+]], !nosanitize [[META4:![0-9]+]] ; CHECK-NEXT: ret i32 [[TMP1]] ; entry: @@ -28,10 +29,10 @@ entry: !5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16} !6 = !{!5, !2, i64 12} ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { sanitize_type } +; CHECK: attributes #[[ATTR0]] = { sanitize_type } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"Simple C++ TBAA"} diff --git a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll index f60dd48a464d2..a18c3bad12fcf 100644 --- a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll +++ b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 6 ; RUN: opt < %s -passes=argpromotion -S | FileCheck %s ; PR17906 @@ -14,12 +14,12 @@ @d = global i8 0, align 1 define internal fastcc void @fn(ptr nocapture readonly %p1, ptr nocapture readonly %p2) { -; CHECK-LABEL: define {{[^@]+}}@fn -; CHECK-SAME: (i32 [[P1_0_VAL:%.*]], i64 [[P2_0_VAL:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define internal fastcc void @fn( +; CHECK-SAME: i32 [[P1_0_VAL:%.*]], i64 [[P2_0_VAL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[P2_0_VAL]] to i32 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[P1_0_VAL]] to i8 -; CHECK-NEXT: store i8 [[CONV1]], ptr @d, align 1, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store i8 [[CONV1]], ptr @d, align 1, !tbaa [[CHAR_TBAA0:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -32,14 +32,14 @@ entry: } define i32 @main() { -; CHECK-LABEL: define {{[^@]+}}@main() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA3:![0-9]+]] -; CHECK-NEXT: store ptr @g, ptr [[TMP0]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: store i32 1, ptr [[TMP1]], align 4, !tbaa [[TBAA5:![0-9]+]] -; CHECK-NEXT: [[G_VAL:%.*]] = load i32, ptr @g, align 4, !tbaa [[TBAA5]] -; CHECK-NEXT: [[C_VAL:%.*]] = load i64, ptr @c, align 8, !tbaa [[TBAA7:![0-9]+]] +; CHECK-LABEL: define i32 @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[ANYPTR_TBAA3:![0-9]+]] +; CHECK-NEXT: store ptr @g, ptr [[TMP0]], align 8, !tbaa [[ANYPTR_TBAA3]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[ANYPTR_TBAA3]] +; CHECK-NEXT: store i32 1, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, ptr @g, align 4, !tbaa [[INT_TBAA5]] +; CHECK-NEXT: [[C_VAL:%.*]] = load i64, ptr @c, align 8, !tbaa [[LONG_TBAA7:![0-9]+]] ; CHECK-NEXT: call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]]) ; CHECK-NEXT: ret i32 0 ; @@ -63,3 +63,14 @@ entry: !8 = !{!9, !9, i64 0} !9 = !{!"any pointer", !3, i64 0} +;. +; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[ANYPTR_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"any pointer", [[META1]], i64 0} +; CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"int", [[META1]], i64 0} +; CHECK: [[LONG_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; CHECK: [[META8]] = !{!"long", [[META1]], i64 0} +;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll index bed038968a527..c27f827fc941e 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC @@ -23,12 +23,12 @@ ;. define internal fastcc void @fn(ptr nocapture readonly %p1, ptr nocapture readonly %p2) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, argmem: none) -; CHECK-LABEL: define {{[^@]+}}@fn -; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @g, align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-LABEL: define internal fastcc void @fn( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @g, align 4, !tbaa [[INT_TBAA0:![0-9]+]] ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8 -; CHECK-NEXT: store i8 [[CONV1]], ptr @d, align 1, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: store i8 [[CONV1]], ptr @d, align 1, !tbaa [[CHAR_TBAA4:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -42,24 +42,24 @@ entry: define i32 @main() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@main -; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA5:![0-9]+]] -; TUNIT-NEXT: store ptr @g, ptr [[TMP0]], align 8, !tbaa [[TBAA5]] -; TUNIT-NEXT: [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[TBAA5]] -; TUNIT-NEXT: store i32 1, ptr [[TMP1]], align 4, !tbaa [[TBAA0]] +; TUNIT-LABEL: define noundef i32 @main( +; TUNIT-SAME: ) #[[ATTR1:[0-9]+]] { +; TUNIT-NEXT: [[ENTRY:.*:]] +; TUNIT-NEXT: [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[ANYPTR_TBAA5:![0-9]+]] +; TUNIT-NEXT: store ptr @g, ptr [[TMP0]], align 8, !tbaa [[ANYPTR_TBAA5]] +; TUNIT-NEXT: [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[ANYPTR_TBAA5]] +; TUNIT-NEXT: store i32 1, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0]] ; TUNIT-NEXT: call fastcc void @fn() #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret i32 0 ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@main -; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA5:![0-9]+]] -; CGSCC-NEXT: store ptr @g, ptr [[TMP0]], align 8, !tbaa [[TBAA5]] -; CGSCC-NEXT: [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[TBAA5]] -; CGSCC-NEXT: store i32 1, ptr [[TMP1]], align 4, !tbaa [[TBAA0]] +; CGSCC-LABEL: define noundef i32 @main( +; CGSCC-SAME: ) #[[ATTR1:[0-9]+]] { +; CGSCC-NEXT: [[ENTRY:.*:]] +; CGSCC-NEXT: [[TMP0:%.*]] = load ptr, ptr @e, align 8, !tbaa [[ANYPTR_TBAA5:![0-9]+]] +; CGSCC-NEXT: store ptr @g, ptr [[TMP0]], align 8, !tbaa [[ANYPTR_TBAA5]] +; CGSCC-NEXT: [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[ANYPTR_TBAA5]] +; CGSCC-NEXT: store i32 1, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0]] ; CGSCC-NEXT: call fastcc void @fn() #[[ATTR2:[0-9]+]] ; CGSCC-NEXT: ret i32 0 ; @@ -92,19 +92,19 @@ entry: ; CGSCC: attributes #[[ATTR1]] = { mustprogress nofree nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR2]] = { nofree nounwind willreturn } ;. -; TUNIT: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; TUNIT: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; TUNIT: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} ; TUNIT: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; TUNIT: [[META3]] = !{!"Simple C/C++ TBAA"} -; TUNIT: [[TBAA4]] = !{[[META2]], [[META2]], i64 0} -; TUNIT: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; TUNIT: [[CHAR_TBAA4]] = !{[[META2]], [[META2]], i64 0} +; TUNIT: [[ANYPTR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} ; TUNIT: [[META6]] = !{!"any pointer", [[META2]], i64 0} ;. -; CGSCC: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CGSCC: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CGSCC: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} ; CGSCC: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; CGSCC: [[META3]] = !{!"Simple C/C++ TBAA"} -; CGSCC: [[TBAA4]] = !{[[META2]], [[META2]], i64 0} -; CGSCC: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CGSCC: [[CHAR_TBAA4]] = !{[[META2]], [[META2]], i64 0} +; CGSCC: [[ANYPTR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} ; CGSCC: [[META6]] = !{!"any pointer", [[META2]], i64 0} ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll index 82bed0f27c046..3e07fe42261e9 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC ; @@ -66,10 +66,10 @@ ;. define void @write_arg(ptr %p, i32 %v) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) -; CHECK-LABEL: define {{[^@]+}}@write_arg -; CHECK-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-LABEL: define void @write_arg( +; CHECK-SAME: ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -78,11 +78,11 @@ entry: } define void @write_random(ptr %p) { -; CHECK-LABEL: define {{[^@]+}}@write_random -; CHECK-SAME: (ptr nofree writeonly captures(none) [[P:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @write_random( +; CHECK-SAME: ptr nofree writeonly captures(none) [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @random() -; CHECK-NEXT: store i32 [[CALL]], ptr [[P]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: store i32 [[CALL]], ptr [[P]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: ret void ; entry: @@ -112,9 +112,9 @@ declare i32 @random(...) ; } define void @local_alloca_simplifiable_1(ptr noalias sret(%struct.S) align 4 %agg.result) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -; TUNIT-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1 -; TUNIT-SAME: (ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define void @local_alloca_simplifiable_1( +; TUNIT-SAME: ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 ; TUNIT-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR17:[0-9]+]] ; TUNIT-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 @@ -126,65 +126,65 @@ define void @local_alloca_simplifiable_1(ptr noalias sret(%struct.S) align 4 %ag ; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2 ; TUNIT-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR18]] ; TUNIT-NEXT: [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3 -; TUNIT-NEXT: store float 0x3FF19999A0000000, ptr [[F12]], align 4, !tbaa [[TBAA7:![0-9]+]] +; TUNIT-NEXT: store float 0x3FF19999A0000000, ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7:![0-9]+]] ; TUNIT-NEXT: [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4 -; TUNIT-NEXT: store float 0x40119999A0000000, ptr [[F24]], align 4, !tbaa [[TBAA10:![0-9]+]] +; TUNIT-NEXT: store float 0x40119999A0000000, ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10:![0-9]+]] ; TUNIT-NEXT: [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5 -; TUNIT-NEXT: store float 0x40119999A0000000, ptr [[F37]], align 4, !tbaa [[TBAA11:![0-9]+]] -; TUNIT-NEXT: store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12:![0-9]+]] +; TUNIT-NEXT: store float 0x40119999A0000000, ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11:![0-9]+]] +; TUNIT-NEXT: store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; TUNIT-NEXT: [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1 -; TUNIT-NEXT: store i32 4, ptr [[I212]], align 4, !tbaa [[TBAA13:![0-9]+]] +; TUNIT-NEXT: store i32 4, ptr [[I212]], align 4, !tbaa [[INT_TBAA13:![0-9]+]] ; TUNIT-NEXT: [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2 -; TUNIT-NEXT: store i32 4, ptr [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]] +; TUNIT-NEXT: store i32 4, ptr [[I316]], align 4, !tbaa [[INT_TBAA14:![0-9]+]] ; TUNIT-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR17]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) -; CGSCC-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1 -; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define void @local_alloca_simplifiable_1( +; CGSCC-SAME: ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 ; CGSCC-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR20:[0-9]+]] ; CGSCC-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 -; CGSCC-NEXT: store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7:![0-9]+]] +; CGSCC-NEXT: store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7:![0-9]+]] ; CGSCC-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4 -; CGSCC-NEXT: store float 0x40019999A0000000, ptr [[F2]], align 4, !tbaa [[TBAA10:![0-9]+]] +; CGSCC-NEXT: store float 0x40019999A0000000, ptr [[F2]], align 4, !tbaa [[FLOAT_TBAA10:![0-9]+]] ; CGSCC-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5 -; CGSCC-NEXT: store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[TBAA11:![0-9]+]] +; CGSCC-NEXT: store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11:![0-9]+]] ; CGSCC-NEXT: call void @write_arg(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(24) [[S]], i32 noundef 1) #[[ATTR21:[0-9]+]] ; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1 ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR21]] ; CGSCC-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2 ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR21]] ; CGSCC-NEXT: [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 -; CGSCC-NEXT: [[I4:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[TBAA7]] +; CGSCC-NEXT: [[I4:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[FLOAT_TBAA7]] ; CGSCC-NEXT: [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3 -; CGSCC-NEXT: store float [[I4]], ptr [[F12]], align 4, !tbaa [[TBAA7]] +; CGSCC-NEXT: store float [[I4]], ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7]] ; CGSCC-NEXT: [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4 -; CGSCC-NEXT: [[I5:%.*]] = load float, ptr [[F23]], align 4, !tbaa [[TBAA10]] +; CGSCC-NEXT: [[I5:%.*]] = load float, ptr [[F23]], align 4, !tbaa [[FLOAT_TBAA10]] ; CGSCC-NEXT: [[MUL:%.*]] = fmul float [[I5]], 2.000000e+00 ; CGSCC-NEXT: [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4 -; CGSCC-NEXT: store float [[MUL]], ptr [[F24]], align 4, !tbaa [[TBAA10]] +; CGSCC-NEXT: store float [[MUL]], ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10]] ; CGSCC-NEXT: [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5 -; CGSCC-NEXT: [[I6:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[TBAA11]] +; CGSCC-NEXT: [[I6:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[FLOAT_TBAA11]] ; CGSCC-NEXT: [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 -; CGSCC-NEXT: [[I7:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[TBAA7]] +; CGSCC-NEXT: [[I7:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[FLOAT_TBAA7]] ; CGSCC-NEXT: [[ADD:%.*]] = fadd float [[I6]], [[I7]] ; CGSCC-NEXT: [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5 -; CGSCC-NEXT: store float [[ADD]], ptr [[F37]], align 4, !tbaa [[TBAA11]] -; CGSCC-NEXT: [[I8:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[TBAA12:![0-9]+]] -; CGSCC-NEXT: store i32 [[I8]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]] +; CGSCC-NEXT: store float [[ADD]], ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11]] +; CGSCC-NEXT: [[I8:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] +; CGSCC-NEXT: store i32 [[I8]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]] ; CGSCC-NEXT: [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1 -; CGSCC-NEXT: [[I9:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[TBAA13:![0-9]+]] +; CGSCC-NEXT: [[I9:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[INT_TBAA13:![0-9]+]] ; CGSCC-NEXT: [[MUL11:%.*]] = shl nsw i32 [[I9]], 1 ; CGSCC-NEXT: [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1 -; CGSCC-NEXT: store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[TBAA13]] +; CGSCC-NEXT: store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[INT_TBAA13]] ; CGSCC-NEXT: [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2 -; CGSCC-NEXT: [[I10:%.*]] = load i32, ptr [[I313]], align 4, !tbaa [[TBAA14:![0-9]+]] -; CGSCC-NEXT: [[I11:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[TBAA12]] +; CGSCC-NEXT: [[I10:%.*]] = load i32, ptr [[I313]], align 4, !tbaa [[INT_TBAA14:![0-9]+]] +; CGSCC-NEXT: [[I11:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[INT_TBAA12]] ; CGSCC-NEXT: [[ADD15:%.*]] = add nsw i32 [[I10]], [[I11]] ; CGSCC-NEXT: [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2 -; CGSCC-NEXT: store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[TBAA14]] +; CGSCC-NEXT: store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[INT_TBAA14]] ; CGSCC-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR20]] ; CGSCC-NEXT: ret void ; @@ -256,156 +256,156 @@ declare void @llvm.lifetime.end.p0(ptr nocapture) ; define void @local_alloca_simplifiable_2() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@local_alloca_simplifiable_2 -; TUNIT-SAME: () #[[ATTR3:[0-9]+]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define void @local_alloca_simplifiable_2( +; TUNIT-SAME: ) #[[ATTR3:[0-9]+]] { +; TUNIT-NEXT: [[ENTRY:.*]]: ; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; TUNIT-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR17]] -; TUNIT-NEXT: br label [[FOR_COND:%.*]] -; TUNIT: for.cond: -; TUNIT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; TUNIT-NEXT: br label %[[FOR_COND:.*]] +; TUNIT: [[FOR_COND]]: +; TUNIT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ] ; TUNIT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100 -; TUNIT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; TUNIT: for.cond.cleanup: -; TUNIT-NEXT: br label [[FOR_END:%.*]] -; TUNIT: for.body: +; TUNIT-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +; TUNIT: [[FOR_COND_CLEANUP]]: +; TUNIT-NEXT: br label %[[FOR_END:.*]] +; TUNIT: [[FOR_BODY]]: ; TUNIT-NEXT: [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10 ; TUNIT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[I15]] -; TUNIT-NEXT: br label [[FOR_INC]] -; TUNIT: for.inc: +; TUNIT-NEXT: br label %[[FOR_INC]] +; TUNIT: [[FOR_INC]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TUNIT-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] -; TUNIT: for.end: -; TUNIT-NEXT: br label [[FOR_COND2:%.*]] -; TUNIT: for.cond2: -; TUNIT-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ] +; TUNIT-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +; TUNIT: [[FOR_END]]: +; TUNIT-NEXT: br label %[[FOR_COND2:.*]] +; TUNIT: [[FOR_COND2]]: +; TUNIT-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ] ; TUNIT-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10 -; TUNIT-NEXT: br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]] -; TUNIT: for.cond.cleanup4: -; TUNIT-NEXT: br label [[FOR_END11:%.*]] -; TUNIT: for.body5: +; TUNIT-NEXT: br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]] +; TUNIT: [[FOR_COND_CLEANUP4]]: +; TUNIT-NEXT: br label %[[FOR_END11:.*]] +; TUNIT: [[FOR_BODY5]]: ; TUNIT-NEXT: [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10 ; TUNIT-NEXT: [[I18:%.*]] = or i64 [[I17]], 1 ; TUNIT-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I18]] -; TUNIT-NEXT: br label [[FOR_INC9]] -; TUNIT: for.inc9: +; TUNIT-NEXT: br label %[[FOR_INC9]] +; TUNIT: [[FOR_INC9]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; TUNIT-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP17:![0-9]+]] -; TUNIT: for.end11: -; TUNIT-NEXT: br label [[FOR_COND13:%.*]] -; TUNIT: for.cond13: -; TUNIT-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC22:%.*]] ], [ 0, [[FOR_END11]] ] +; TUNIT-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP17:![0-9]+]] +; TUNIT: [[FOR_END11]]: +; TUNIT-NEXT: br label %[[FOR_COND13:.*]] +; TUNIT: [[FOR_COND13]]: +; TUNIT-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC22:.*]] ], [ 0, %[[FOR_END11]] ] ; TUNIT-NEXT: [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20 -; TUNIT-NEXT: br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]] -; TUNIT: for.cond.cleanup15: -; TUNIT-NEXT: br label [[FOR_END24:%.*]] -; TUNIT: for.body16: +; TUNIT-NEXT: br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]] +; TUNIT: [[FOR_COND_CLEANUP15]]: +; TUNIT-NEXT: br label %[[FOR_END24:.*]] +; TUNIT: [[FOR_BODY16]]: ; TUNIT-NEXT: [[I20:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10 ; TUNIT-NEXT: [[I21:%.*]] = add nuw nsw i64 [[I20]], 2 ; TUNIT-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I21]] -; TUNIT-NEXT: br label [[FOR_INC22]] -; TUNIT: for.inc22: +; TUNIT-NEXT: br label %[[FOR_INC22]] +; TUNIT: [[FOR_INC22]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1 -; TUNIT-NEXT: br label [[FOR_COND13]], !llvm.loop [[LOOP18:![0-9]+]] -; TUNIT: for.end24: +; TUNIT-NEXT: br label %[[FOR_COND13]], !llvm.loop [[LOOP18:![0-9]+]] +; TUNIT: [[FOR_END24]]: ; TUNIT-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 1023 ; TUNIT-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 500 ; TUNIT-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) [[ARRAYIDX26]], i32 noundef 0) #[[ATTR18]] -; TUNIT-NEXT: br label [[FOR_COND28:%.*]] -; TUNIT: for.cond28: -; TUNIT-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ] +; TUNIT-NEXT: br label %[[FOR_COND28:.*]] +; TUNIT: [[FOR_COND28]]: +; TUNIT-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC36:.*]] ], [ 0, %[[FOR_END24]] ] ; TUNIT-NEXT: [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024 -; TUNIT-NEXT: br i1 [[EXITCOND14]], label [[FOR_BODY31:%.*]], label [[FOR_COND_CLEANUP30:%.*]] -; TUNIT: for.cond.cleanup30: -; TUNIT-NEXT: br label [[FOR_END38:%.*]] -; TUNIT: for.body31: +; TUNIT-NEXT: br i1 [[EXITCOND14]], label %[[FOR_BODY31:.*]], label %[[FOR_COND_CLEANUP30:.*]] +; TUNIT: [[FOR_COND_CLEANUP30]]: +; TUNIT-NEXT: br label %[[FOR_END38:.*]] +; TUNIT: [[FOR_BODY31]]: ; TUNIT-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]] -; TUNIT-NEXT: store i8 0, ptr [[ARRAYIDX35]], align 1, !tbaa [[TBAA19:![0-9]+]] -; TUNIT-NEXT: br label [[FOR_INC36]] -; TUNIT: for.inc36: +; TUNIT-NEXT: store i8 0, ptr [[ARRAYIDX35]], align 1, !tbaa [[CHAR_TBAA19:![0-9]+]] +; TUNIT-NEXT: br label %[[FOR_INC36]] +; TUNIT: [[FOR_INC36]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1 -; TUNIT-NEXT: br label [[FOR_COND28]], !llvm.loop [[LOOP20:![0-9]+]] -; TUNIT: for.end38: +; TUNIT-NEXT: br label %[[FOR_COND28]], !llvm.loop [[LOOP20:![0-9]+]] +; TUNIT: [[FOR_END38]]: ; TUNIT-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR17]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@local_alloca_simplifiable_2 -; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define void @local_alloca_simplifiable_2( +; CGSCC-SAME: ) #[[ATTR3:[0-9]+]] { +; CGSCC-NEXT: [[ENTRY:.*]]: ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR20]] -; CGSCC-NEXT: br label [[FOR_COND:%.*]] -; CGSCC: for.cond: -; CGSCC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CGSCC-NEXT: br label %[[FOR_COND:.*]] +; CGSCC: [[FOR_COND]]: +; CGSCC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ] ; CGSCC-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100 -; CGSCC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CGSCC: for.cond.cleanup: -; CGSCC-NEXT: br label [[FOR_END:%.*]] -; CGSCC: for.body: +; CGSCC-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CGSCC: [[FOR_COND_CLEANUP]]: +; CGSCC-NEXT: br label %[[FOR_END:.*]] +; CGSCC: [[FOR_BODY]]: ; CGSCC-NEXT: [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10 ; CGSCC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[I15]] -; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA15:![0-9]+]] -; CGSCC-NEXT: br label [[FOR_INC]] -; CGSCC: for.inc: +; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[CHAR_TBAA15:![0-9]+]] +; CGSCC-NEXT: br label %[[FOR_INC]] +; CGSCC: [[FOR_INC]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CGSCC-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] -; CGSCC: for.end: -; CGSCC-NEXT: br label [[FOR_COND2:%.*]] -; CGSCC: for.cond2: -; CGSCC-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ] +; CGSCC-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +; CGSCC: [[FOR_END]]: +; CGSCC-NEXT: br label %[[FOR_COND2:.*]] +; CGSCC: [[FOR_COND2]]: +; CGSCC-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ] ; CGSCC-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10 -; CGSCC-NEXT: br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]] -; CGSCC: for.cond.cleanup4: -; CGSCC-NEXT: br label [[FOR_END11:%.*]] -; CGSCC: for.body5: +; CGSCC-NEXT: br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]] +; CGSCC: [[FOR_COND_CLEANUP4]]: +; CGSCC-NEXT: br label %[[FOR_END11:.*]] +; CGSCC: [[FOR_BODY5]]: ; CGSCC-NEXT: [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10 ; CGSCC-NEXT: [[I18:%.*]] = or i64 [[I17]], 1 ; CGSCC-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I18]] -; CGSCC-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA18:![0-9]+]] -; CGSCC-NEXT: br label [[FOR_INC9]] -; CGSCC: for.inc9: +; CGSCC-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA18:![0-9]+]] +; CGSCC-NEXT: br label %[[FOR_INC9]] +; CGSCC: [[FOR_INC9]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CGSCC-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP19:![0-9]+]] -; CGSCC: for.end11: -; CGSCC-NEXT: br label [[FOR_COND13:%.*]] -; CGSCC: for.cond13: -; CGSCC-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC22:%.*]] ], [ 0, [[FOR_END11]] ] +; CGSCC-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP19:![0-9]+]] +; CGSCC: [[FOR_END11]]: +; CGSCC-NEXT: br label %[[FOR_COND13:.*]] +; CGSCC: [[FOR_COND13]]: +; CGSCC-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC22:.*]] ], [ 0, %[[FOR_END11]] ] ; CGSCC-NEXT: [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20 -; CGSCC-NEXT: br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]] -; CGSCC: for.cond.cleanup15: -; CGSCC-NEXT: br label [[FOR_END24:%.*]] -; CGSCC: for.body16: +; CGSCC-NEXT: br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]] +; CGSCC: [[FOR_COND_CLEANUP15]]: +; CGSCC-NEXT: br label %[[FOR_END24:.*]] +; CGSCC: [[FOR_BODY16]]: ; CGSCC-NEXT: [[I20:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10 ; CGSCC-NEXT: [[I21:%.*]] = add nuw nsw i64 [[I20]], 2 ; CGSCC-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I21]] -; CGSCC-NEXT: store i64 0, ptr [[ARRAYIDX21]], align 16, !tbaa [[TBAA20:![0-9]+]] -; CGSCC-NEXT: br label [[FOR_INC22]] -; CGSCC: for.inc22: +; CGSCC-NEXT: store i64 0, ptr [[ARRAYIDX21]], align 16, !tbaa [[LONG_LONG_TBAA20:![0-9]+]] +; CGSCC-NEXT: br label %[[FOR_INC22]] +; CGSCC: [[FOR_INC22]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1 -; CGSCC-NEXT: br label [[FOR_COND13]], !llvm.loop [[LOOP22:![0-9]+]] -; CGSCC: for.end24: +; CGSCC-NEXT: br label %[[FOR_COND13]], !llvm.loop [[LOOP22:![0-9]+]] +; CGSCC: [[FOR_END24]]: ; CGSCC-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 1023 -; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]] +; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX25]], align 1, !tbaa [[CHAR_TBAA15]] ; CGSCC-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 500 ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) [[ARRAYIDX26]], i32 noundef 0) #[[ATTR21]] -; CGSCC-NEXT: br label [[FOR_COND28:%.*]] -; CGSCC: for.cond28: -; CGSCC-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ] +; CGSCC-NEXT: br label %[[FOR_COND28:.*]] +; CGSCC: [[FOR_COND28]]: +; CGSCC-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC36:.*]] ], [ 0, %[[FOR_END24]] ] ; CGSCC-NEXT: [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024 -; CGSCC-NEXT: br i1 [[EXITCOND14]], label [[FOR_BODY31:%.*]], label [[FOR_COND_CLEANUP30:%.*]] -; CGSCC: for.cond.cleanup30: -; CGSCC-NEXT: br label [[FOR_END38:%.*]] -; CGSCC: for.body31: +; CGSCC-NEXT: br i1 [[EXITCOND14]], label %[[FOR_BODY31:.*]], label %[[FOR_COND_CLEANUP30:.*]] +; CGSCC: [[FOR_COND_CLEANUP30]]: +; CGSCC-NEXT: br label %[[FOR_END38:.*]] +; CGSCC: [[FOR_BODY31]]: ; CGSCC-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[INDVARS_IV12]] -; CGSCC-NEXT: [[I23:%.*]] = load i8, ptr [[ARRAYIDX33]], align 1, !tbaa [[TBAA15]] +; CGSCC-NEXT: [[I23:%.*]] = load i8, ptr [[ARRAYIDX33]], align 1, !tbaa [[CHAR_TBAA15]] ; CGSCC-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]] -; CGSCC-NEXT: store i8 [[I23]], ptr [[ARRAYIDX35]], align 1, !tbaa [[TBAA15]] -; CGSCC-NEXT: br label [[FOR_INC36]] -; CGSCC: for.inc36: +; CGSCC-NEXT: store i8 [[I23]], ptr [[ARRAYIDX35]], align 1, !tbaa [[CHAR_TBAA15]] +; CGSCC-NEXT: br label %[[FOR_INC36]] +; CGSCC: [[FOR_INC36]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1 -; CGSCC-NEXT: br label [[FOR_COND28]], !llvm.loop [[LOOP23:![0-9]+]] -; CGSCC: for.end38: +; CGSCC-NEXT: br label %[[FOR_COND28]], !llvm.loop [[LOOP23:![0-9]+]] +; CGSCC: [[FOR_END38]]: ; CGSCC-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR20]] ; CGSCC-NEXT: ret void ; @@ -516,10 +516,10 @@ for.end38: ; preds = %for.cond.cleanup30 ; define i32 @local_alloca_simplifiable_3() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_3 -; CHECK-SAME: () #[[ATTR4:[0-9]+]] { -; CHECK-NEXT: br label [[SPLIT:%.*]] -; CHECK: split: +; CHECK-LABEL: define noundef i32 @local_alloca_simplifiable_3( +; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: br label %[[SPLIT:.*]] +; CHECK: [[SPLIT]]: ; CHECK-NEXT: ret i32 2 ; %A = alloca i32, align 4 @@ -537,8 +537,8 @@ split: ; define i32 @local_alloca_simplifiable_4() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_4 -; CHECK-SAME: () #[[ATTR4]] { +; CHECK-LABEL: define i32 @local_alloca_simplifiable_4( +; CHECK-SAME: ) #[[ATTR4]] { ; CHECK-NEXT: ret i32 undef ; %A = alloca i32, align 4 @@ -554,34 +554,34 @@ define i32 @local_alloca_simplifiable_4() { ; } define i32 @multi_obj_simplifiable_1(i32 %cnd) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@multi_obj_simplifiable_1 -; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR3]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define noundef i32 @multi_obj_simplifiable_1( +; TUNIT-SAME: i32 [[CND:%.*]]) #[[ATTR3]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[L:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]] ; TUNIT-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 -; TUNIT-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] -; TUNIT: cond.true: -; TUNIT-NEXT: br label [[COND_END:%.*]] -; TUNIT: cond.false: -; TUNIT-NEXT: br label [[COND_END]] -; TUNIT: cond.end: +; TUNIT-NEXT: br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]] +; TUNIT: [[COND_TRUE]]: +; TUNIT-NEXT: br label %[[COND_END:.*]] +; TUNIT: [[COND_FALSE]]: +; TUNIT-NEXT: br label %[[COND_END]] +; TUNIT: [[COND_END]]: ; TUNIT-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]] ; TUNIT-NEXT: ret i32 5 ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@multi_obj_simplifiable_1 -; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR5:[0-9]+]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define noundef i32 @multi_obj_simplifiable_1( +; CGSCC-SAME: i32 [[CND:%.*]]) #[[ATTR5:[0-9]+]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[L:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]] ; CGSCC-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 -; CGSCC-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] -; CGSCC: cond.true: -; CGSCC-NEXT: br label [[COND_END:%.*]] -; CGSCC: cond.false: -; CGSCC-NEXT: br label [[COND_END]] -; CGSCC: cond.end: +; CGSCC-NEXT: br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]] +; CGSCC: [[COND_TRUE]]: +; CGSCC-NEXT: br label %[[COND_END:.*]] +; CGSCC: [[COND_FALSE]]: +; CGSCC-NEXT: br label %[[COND_END]] +; CGSCC: [[COND_END]]: ; CGSCC-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]] ; CGSCC-NEXT: ret i32 5 ; @@ -616,34 +616,34 @@ cond.end: ; preds = %cond.false, %cond.t ; define i32 @multi_obj_simplifiable_2(i32 %cnd) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@multi_obj_simplifiable_2 -; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR3]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define i32 @multi_obj_simplifiable_2( +; TUNIT-SAME: i32 [[CND:%.*]]) #[[ATTR3]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[L:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]] ; TUNIT-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 -; TUNIT-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] -; TUNIT: cond.true: -; TUNIT-NEXT: br label [[COND_END:%.*]] -; TUNIT: cond.false: -; TUNIT-NEXT: br label [[COND_END]] -; TUNIT: cond.end: +; TUNIT-NEXT: br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]] +; TUNIT: [[COND_TRUE]]: +; TUNIT-NEXT: br label %[[COND_END:.*]] +; TUNIT: [[COND_FALSE]]: +; TUNIT-NEXT: br label %[[COND_END]] +; TUNIT: [[COND_END]]: ; TUNIT-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]] ; TUNIT-NEXT: ret i32 5 ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@multi_obj_simplifiable_2 -; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR5]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define i32 @multi_obj_simplifiable_2( +; CGSCC-SAME: i32 [[CND:%.*]]) #[[ATTR5]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[L:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]] ; CGSCC-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 -; CGSCC-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] -; CGSCC: cond.true: -; CGSCC-NEXT: br label [[COND_END:%.*]] -; CGSCC: cond.false: -; CGSCC-NEXT: br label [[COND_END]] -; CGSCC: cond.end: +; CGSCC-NEXT: br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]] +; CGSCC: [[COND_TRUE]]: +; CGSCC-NEXT: br label %[[COND_END:.*]] +; CGSCC: [[COND_FALSE]]: +; CGSCC-NEXT: br label %[[COND_END]] +; CGSCC: [[COND_END]]: ; CGSCC-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]] ; CGSCC-NEXT: ret i32 5 ; @@ -687,58 +687,58 @@ cond.end: ; preds = %cond.false, %cond.t ; define void @static_global_simplifiable_1(ptr noalias sret(%struct.S) align 4 %agg.result) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_1 -; TUNIT-SAME: (ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define void @static_global_simplifiable_1( +; TUNIT-SAME: ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(24) @Gs1, i32 noundef 1) #[[ATTR18]] ; TUNIT-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR18]] ; TUNIT-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR18]] ; TUNIT-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3 -; TUNIT-NEXT: store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7]] +; TUNIT-NEXT: store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]] ; TUNIT-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4 -; TUNIT-NEXT: store float 0x40119999A0000000, ptr [[F2]], align 4, !tbaa [[TBAA10]] +; TUNIT-NEXT: store float 0x40119999A0000000, ptr [[F2]], align 4, !tbaa [[FLOAT_TBAA10]] ; TUNIT-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5 -; TUNIT-NEXT: store float 0x40119999A0000000, ptr [[F3]], align 4, !tbaa [[TBAA11]] -; TUNIT-NEXT: store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]] +; TUNIT-NEXT: store float 0x40119999A0000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]] +; TUNIT-NEXT: store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]] ; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1 -; TUNIT-NEXT: store i32 4, ptr [[I2]], align 4, !tbaa [[TBAA13]] +; TUNIT-NEXT: store i32 4, ptr [[I2]], align 4, !tbaa [[INT_TBAA13]] ; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2 -; TUNIT-NEXT: store i32 4, ptr [[I3]], align 4, !tbaa [[TBAA14]] +; TUNIT-NEXT: store i32 4, ptr [[I3]], align 4, !tbaa [[INT_TBAA14]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_1 -; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: store float 0x3FF19999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]] -; CGSCC-NEXT: store float 0x40019999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]] -; CGSCC-NEXT: store float 0x400A666660000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]] +; CGSCC-LABEL: define void @static_global_simplifiable_1( +; CGSCC-SAME: ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR3]] { +; CGSCC-NEXT: [[ENTRY:.*:]] +; CGSCC-NEXT: store float 0x3FF19999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[FLOAT_TBAA7]] +; CGSCC-NEXT: store float 0x40019999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[FLOAT_TBAA10]] +; CGSCC-NEXT: store float 0x400A666660000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[FLOAT_TBAA11]] ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(24) @Gs1, i32 noundef 1) #[[ATTR21]] ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR21]] ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR21]] -; CGSCC-NEXT: [[I:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]] +; CGSCC-NEXT: [[I:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[FLOAT_TBAA7]] ; CGSCC-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3 -; CGSCC-NEXT: store float [[I]], ptr [[F1]], align 4, !tbaa [[TBAA7]] -; CGSCC-NEXT: [[I4:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]] +; CGSCC-NEXT: store float [[I]], ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]] +; CGSCC-NEXT: [[I4:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[FLOAT_TBAA10]] ; CGSCC-NEXT: [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00 ; CGSCC-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4 -; CGSCC-NEXT: store float [[MUL]], ptr [[F2]], align 4, !tbaa [[TBAA10]] -; CGSCC-NEXT: [[I5:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]] -; CGSCC-NEXT: [[I6:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]] +; CGSCC-NEXT: store float [[MUL]], ptr [[F2]], align 4, !tbaa [[FLOAT_TBAA10]] +; CGSCC-NEXT: [[I5:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[FLOAT_TBAA11]] +; CGSCC-NEXT: [[I6:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[FLOAT_TBAA7]] ; CGSCC-NEXT: [[ADD:%.*]] = fadd float [[I5]], [[I6]] ; CGSCC-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5 -; CGSCC-NEXT: store float [[ADD]], ptr [[F3]], align 4, !tbaa [[TBAA11]] -; CGSCC-NEXT: [[I7:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[TBAA12]] -; CGSCC-NEXT: store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]] -; CGSCC-NEXT: [[I8:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), align 4, !tbaa [[TBAA13]] +; CGSCC-NEXT: store float [[ADD]], ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]] +; CGSCC-NEXT: [[I7:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[INT_TBAA12]] +; CGSCC-NEXT: store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]] +; CGSCC-NEXT: [[I8:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), align 4, !tbaa [[INT_TBAA13]] ; CGSCC-NEXT: [[MUL1:%.*]] = shl nsw i32 [[I8]], 1 ; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1 -; CGSCC-NEXT: store i32 [[MUL1]], ptr [[I2]], align 4, !tbaa [[TBAA13]] -; CGSCC-NEXT: [[I9:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), align 4, !tbaa [[TBAA14]] -; CGSCC-NEXT: [[I10:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[TBAA12]] +; CGSCC-NEXT: store i32 [[MUL1]], ptr [[I2]], align 4, !tbaa [[INT_TBAA13]] +; CGSCC-NEXT: [[I9:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), align 4, !tbaa [[INT_TBAA14]] +; CGSCC-NEXT: [[I10:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[INT_TBAA12]] ; CGSCC-NEXT: [[ADD2:%.*]] = add nsw i32 [[I9]], [[I10]] ; CGSCC-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2 -; CGSCC-NEXT: store i32 [[ADD2]], ptr [[I3]], align 4, !tbaa [[TBAA14]] +; CGSCC-NEXT: store i32 [[ADD2]], ptr [[I3]], align 4, !tbaa [[INT_TBAA14]] ; CGSCC-NEXT: ret void ; entry: @@ -776,13 +776,13 @@ entry: define i32 @test_range_merge1() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@test_range_merge1 -; TUNIT-SAME: () #[[ATTR5]] { +; TUNIT-LABEL: define noundef i32 @test_range_merge1( +; TUNIT-SAME: ) #[[ATTR5]] { ; TUNIT-NEXT: ret i32 2 ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@test_range_merge1 -; CGSCC-SAME: () #[[ATTR6:[0-9]+]] { +; CGSCC-LABEL: define noundef i32 @test_range_merge1( +; CGSCC-SAME: ) #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: ret i32 2 ; store <2 x i32> , ptr @Vs1 @@ -795,8 +795,8 @@ define i32 @test_range_merge1() { define i32 @test_range_merge2() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@test_range_merge2 -; TUNIT-SAME: () #[[ATTR3]] { +; TUNIT-LABEL: define i32 @test_range_merge2( +; TUNIT-SAME: ) #[[ATTR3]] { ; TUNIT-NEXT: store <2 x i32> , ptr @Vs2, align 8 ; TUNIT-NEXT: [[L0:%.*]] = load i32, ptr @Vs2, align 4 ; TUNIT-NEXT: [[L1:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S:%.*]], ptr @Vs2, i64 0, i32 1), align 4 @@ -804,8 +804,8 @@ define i32 @test_range_merge2() { ; TUNIT-NEXT: ret i32 [[ADD]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@test_range_merge2 -; CGSCC-SAME: () #[[ATTR5]] { +; CGSCC-LABEL: define i32 @test_range_merge2( +; CGSCC-SAME: ) #[[ATTR5]] { ; CGSCC-NEXT: store <2 x i32> , ptr @Vs2, align 8 ; CGSCC-NEXT: [[L0:%.*]] = load i32, ptr @Vs2, align 4 ; CGSCC-NEXT: [[L1:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S:%.*]], ptr @Vs2, i64 0, i32 1), align 4 @@ -837,147 +837,147 @@ define i32 @test_range_merge2() { ; define void @static_global_simplifiable_2() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_2 -; TUNIT-SAME: () #[[ATTR5]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: br label [[FOR_COND:%.*]] -; TUNIT: for.cond: -; TUNIT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; TUNIT-LABEL: define void @static_global_simplifiable_2( +; TUNIT-SAME: ) #[[ATTR5]] { +; TUNIT-NEXT: [[ENTRY:.*]]: +; TUNIT-NEXT: br label %[[FOR_COND:.*]] +; TUNIT: [[FOR_COND]]: +; TUNIT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ] ; TUNIT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100 -; TUNIT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; TUNIT: for.cond.cleanup: -; TUNIT-NEXT: br label [[FOR_END:%.*]] -; TUNIT: for.body: +; TUNIT-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +; TUNIT: [[FOR_COND_CLEANUP]]: +; TUNIT-NEXT: br label %[[FOR_END:.*]] +; TUNIT: [[FOR_BODY]]: ; TUNIT-NEXT: [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10 ; TUNIT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr @GBytes, i64 0, i64 [[I]] -; TUNIT-NEXT: br label [[FOR_INC]] -; TUNIT: for.inc: +; TUNIT-NEXT: br label %[[FOR_INC]] +; TUNIT: [[FOR_INC]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TUNIT-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] -; TUNIT: for.end: -; TUNIT-NEXT: br label [[FOR_COND2:%.*]] -; TUNIT: for.cond2: -; TUNIT-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ] +; TUNIT-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +; TUNIT: [[FOR_END]]: +; TUNIT-NEXT: br label %[[FOR_COND2:.*]] +; TUNIT: [[FOR_COND2]]: +; TUNIT-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ] ; TUNIT-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10 -; TUNIT-NEXT: br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]] -; TUNIT: for.cond.cleanup4: -; TUNIT-NEXT: br label [[FOR_END11:%.*]] -; TUNIT: for.body5: +; TUNIT-NEXT: br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]] +; TUNIT: [[FOR_COND_CLEANUP4]]: +; TUNIT-NEXT: br label %[[FOR_END11:.*]] +; TUNIT: [[FOR_BODY5]]: ; TUNIT-NEXT: [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10 ; TUNIT-NEXT: [[I16:%.*]] = or i64 [[I15]], 1 ; TUNIT-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr @GBytes, i64 [[I16]] -; TUNIT-NEXT: br label [[FOR_INC9]] -; TUNIT: for.inc9: +; TUNIT-NEXT: br label %[[FOR_INC9]] +; TUNIT: [[FOR_INC9]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; TUNIT-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP22:![0-9]+]] -; TUNIT: for.end11: -; TUNIT-NEXT: br label [[FOR_COND13:%.*]] -; TUNIT: for.cond13: -; TUNIT-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ] +; TUNIT-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP22:![0-9]+]] +; TUNIT: [[FOR_END11]]: +; TUNIT-NEXT: br label %[[FOR_COND13:.*]] +; TUNIT: [[FOR_COND13]]: +; TUNIT-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ] ; TUNIT-NEXT: [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20 -; TUNIT-NEXT: br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]] -; TUNIT: for.cond.cleanup15: -; TUNIT-NEXT: br label [[FOR_END23:%.*]] -; TUNIT: for.body16: +; TUNIT-NEXT: br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]] +; TUNIT: [[FOR_COND_CLEANUP15]]: +; TUNIT-NEXT: br label %[[FOR_END23:.*]] +; TUNIT: [[FOR_BODY16]]: ; TUNIT-NEXT: [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10 ; TUNIT-NEXT: [[I18:%.*]] = add nuw nsw i64 [[I17]], 2 ; TUNIT-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr @GBytes, i64 [[I18]] -; TUNIT-NEXT: br label [[FOR_INC21]] -; TUNIT: for.inc21: +; TUNIT-NEXT: br label %[[FOR_INC21]] +; TUNIT: [[FOR_INC21]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1 -; TUNIT-NEXT: br label [[FOR_COND13]], !llvm.loop [[LOOP23:![0-9]+]] -; TUNIT: for.end23: +; TUNIT-NEXT: br label %[[FOR_COND13]], !llvm.loop [[LOOP23:![0-9]+]] +; TUNIT: [[FOR_END23]]: ; TUNIT-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 500), i32 noundef 0) #[[ATTR18]] -; TUNIT-NEXT: br label [[FOR_COND25:%.*]] -; TUNIT: for.cond25: -; TUNIT-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ] +; TUNIT-NEXT: br label %[[FOR_COND25:.*]] +; TUNIT: [[FOR_COND25]]: +; TUNIT-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC33:.*]] ], [ 0, %[[FOR_END23]] ] ; TUNIT-NEXT: [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024 -; TUNIT-NEXT: br i1 [[EXITCOND14]], label [[FOR_BODY28:%.*]], label [[FOR_COND_CLEANUP27:%.*]] -; TUNIT: for.cond.cleanup27: -; TUNIT-NEXT: br label [[FOR_END35:%.*]] -; TUNIT: for.body28: +; TUNIT-NEXT: br i1 [[EXITCOND14]], label %[[FOR_BODY28:.*]], label %[[FOR_COND_CLEANUP27:.*]] +; TUNIT: [[FOR_COND_CLEANUP27]]: +; TUNIT-NEXT: br label %[[FOR_END35:.*]] +; TUNIT: [[FOR_BODY28]]: ; TUNIT-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]] -; TUNIT-NEXT: store i8 0, ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]] -; TUNIT-NEXT: br label [[FOR_INC33]] -; TUNIT: for.inc33: +; TUNIT-NEXT: store i8 0, ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA19]] +; TUNIT-NEXT: br label %[[FOR_INC33]] +; TUNIT: [[FOR_INC33]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1 -; TUNIT-NEXT: br label [[FOR_COND25]], !llvm.loop [[LOOP24:![0-9]+]] -; TUNIT: for.end35: +; TUNIT-NEXT: br label %[[FOR_COND25]], !llvm.loop [[LOOP24:![0-9]+]] +; TUNIT: [[FOR_END35]]: ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_2 -; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: br label [[FOR_COND:%.*]] -; CGSCC: for.cond: -; CGSCC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CGSCC-LABEL: define void @static_global_simplifiable_2( +; CGSCC-SAME: ) #[[ATTR3]] { +; CGSCC-NEXT: [[ENTRY:.*]]: +; CGSCC-NEXT: br label %[[FOR_COND:.*]] +; CGSCC: [[FOR_COND]]: +; CGSCC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ] ; CGSCC-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100 -; CGSCC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CGSCC: for.cond.cleanup: -; CGSCC-NEXT: br label [[FOR_END:%.*]] -; CGSCC: for.body: +; CGSCC-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CGSCC: [[FOR_COND_CLEANUP]]: +; CGSCC-NEXT: br label %[[FOR_END:.*]] +; CGSCC: [[FOR_BODY]]: ; CGSCC-NEXT: [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10 ; CGSCC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr @GBytes, i64 0, i64 [[I]] -; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA15]] -; CGSCC-NEXT: br label [[FOR_INC]] -; CGSCC: for.inc: +; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[CHAR_TBAA15]] +; CGSCC-NEXT: br label %[[FOR_INC]] +; CGSCC: [[FOR_INC]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CGSCC-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] -; CGSCC: for.end: -; CGSCC-NEXT: br label [[FOR_COND2:%.*]] -; CGSCC: for.cond2: -; CGSCC-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ] +; CGSCC-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +; CGSCC: [[FOR_END]]: +; CGSCC-NEXT: br label %[[FOR_COND2:.*]] +; CGSCC: [[FOR_COND2]]: +; CGSCC-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ] ; CGSCC-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10 -; CGSCC-NEXT: br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]] -; CGSCC: for.cond.cleanup4: -; CGSCC-NEXT: br label [[FOR_END11:%.*]] -; CGSCC: for.body5: +; CGSCC-NEXT: br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]] +; CGSCC: [[FOR_COND_CLEANUP4]]: +; CGSCC-NEXT: br label %[[FOR_END11:.*]] +; CGSCC: [[FOR_BODY5]]: ; CGSCC-NEXT: [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10 ; CGSCC-NEXT: [[I16:%.*]] = or i64 [[I15]], 1 ; CGSCC-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr @GBytes, i64 [[I16]] -; CGSCC-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]] -; CGSCC-NEXT: br label [[FOR_INC9]] -; CGSCC: for.inc9: +; CGSCC-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA18]] +; CGSCC-NEXT: br label %[[FOR_INC9]] +; CGSCC: [[FOR_INC9]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CGSCC-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP25:![0-9]+]] -; CGSCC: for.end11: -; CGSCC-NEXT: br label [[FOR_COND13:%.*]] -; CGSCC: for.cond13: -; CGSCC-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ] +; CGSCC-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP25:![0-9]+]] +; CGSCC: [[FOR_END11]]: +; CGSCC-NEXT: br label %[[FOR_COND13:.*]] +; CGSCC: [[FOR_COND13]]: +; CGSCC-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ] ; CGSCC-NEXT: [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20 -; CGSCC-NEXT: br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]] -; CGSCC: for.cond.cleanup15: -; CGSCC-NEXT: br label [[FOR_END23:%.*]] -; CGSCC: for.body16: +; CGSCC-NEXT: br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]] +; CGSCC: [[FOR_COND_CLEANUP15]]: +; CGSCC-NEXT: br label %[[FOR_END23:.*]] +; CGSCC: [[FOR_BODY16]]: ; CGSCC-NEXT: [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10 ; CGSCC-NEXT: [[I18:%.*]] = add nuw nsw i64 [[I17]], 2 ; CGSCC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr @GBytes, i64 [[I18]] -; CGSCC-NEXT: store i64 0, ptr [[ARRAYIDX20]], align 16, !tbaa [[TBAA20]] -; CGSCC-NEXT: br label [[FOR_INC21]] -; CGSCC: for.inc21: +; CGSCC-NEXT: store i64 0, ptr [[ARRAYIDX20]], align 16, !tbaa [[LONG_LONG_TBAA20]] +; CGSCC-NEXT: br label %[[FOR_INC21]] +; CGSCC: [[FOR_INC21]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1 -; CGSCC-NEXT: br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]] -; CGSCC: for.end23: -; CGSCC-NEXT: store i8 0, ptr getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]] +; CGSCC-NEXT: br label %[[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]] +; CGSCC: [[FOR_END23]]: +; CGSCC-NEXT: store i8 0, ptr getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 1023), align 1, !tbaa [[CHAR_TBAA15]] ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 500), i32 noundef 0) #[[ATTR21]] -; CGSCC-NEXT: br label [[FOR_COND25:%.*]] -; CGSCC: for.cond25: -; CGSCC-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ] +; CGSCC-NEXT: br label %[[FOR_COND25:.*]] +; CGSCC: [[FOR_COND25]]: +; CGSCC-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC33:.*]] ], [ 0, %[[FOR_END23]] ] ; CGSCC-NEXT: [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024 -; CGSCC-NEXT: br i1 [[EXITCOND14]], label [[FOR_BODY28:%.*]], label [[FOR_COND_CLEANUP27:%.*]] -; CGSCC: for.cond.cleanup27: -; CGSCC-NEXT: br label [[FOR_END35:%.*]] -; CGSCC: for.body28: +; CGSCC-NEXT: br i1 [[EXITCOND14]], label %[[FOR_BODY28:.*]], label %[[FOR_COND_CLEANUP27:.*]] +; CGSCC: [[FOR_COND_CLEANUP27]]: +; CGSCC-NEXT: br label %[[FOR_END35:.*]] +; CGSCC: [[FOR_BODY28]]: ; CGSCC-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [1024 x i8], ptr @GBytes, i64 0, i64 [[INDVARS_IV12]] -; CGSCC-NEXT: [[I19:%.*]] = load i8, ptr [[ARRAYIDX30]], align 1, !tbaa [[TBAA15]] +; CGSCC-NEXT: [[I19:%.*]] = load i8, ptr [[ARRAYIDX30]], align 1, !tbaa [[CHAR_TBAA15]] ; CGSCC-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]] -; CGSCC-NEXT: store i8 [[I19]], ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]] -; CGSCC-NEXT: br label [[FOR_INC33]] -; CGSCC: for.inc33: +; CGSCC-NEXT: store i8 [[I19]], ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA15]] +; CGSCC-NEXT: br label %[[FOR_INC33]] +; CGSCC: [[FOR_INC33]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1 -; CGSCC-NEXT: br label [[FOR_COND25]], !llvm.loop [[LOOP27:![0-9]+]] -; CGSCC: for.end35: +; CGSCC-NEXT: br label %[[FOR_COND25]], !llvm.loop [[LOOP27:![0-9]+]] +; CGSCC: [[FOR_END35]]: ; CGSCC-NEXT: ret void ; entry: @@ -1080,15 +1080,15 @@ for.end35: ; preds = %for.cond.cleanup27 ; } define i32 @static_global_simplifiable_3() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_3 -; TUNIT-SAME: () #[[ATTR5]] { -; TUNIT-NEXT: store i32 1, ptr @Flag3, align 4, !tbaa [[TBAA3]] +; TUNIT-LABEL: define noundef i32 @static_global_simplifiable_3( +; TUNIT-SAME: ) #[[ATTR5]] { +; TUNIT-NEXT: store i32 1, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]] ; TUNIT-NEXT: ret i32 1 ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_3 -; CGSCC-SAME: () #[[ATTR6]] { -; CGSCC-NEXT: store i32 1, ptr @Flag3, align 4, !tbaa [[TBAA3]] +; CGSCC-LABEL: define noundef i32 @static_global_simplifiable_3( +; CGSCC-SAME: ) #[[ATTR6]] { +; CGSCC-NEXT: store i32 1, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]] ; CGSCC-NEXT: ret i32 1 ; store i32 1, ptr @Flag3, align 4, !tbaa !3 @@ -1115,95 +1115,95 @@ define i32 @static_global_simplifiable_3() { ; define void @noalias_arg_simplifiable_1(ptr noalias sret(%struct.S) align 4 %agg.result, ptr byval(%struct.S) align 8 %s) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -; TUNIT-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1 -; TUNIT-SAME: (ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]], ptr noalias nofree nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define void @noalias_arg_simplifiable_1( +; TUNIT-SAME: ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]], ptr noalias nofree nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 -; TUNIT-NEXT: store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7]] +; TUNIT-NEXT: store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]] ; TUNIT-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4 -; TUNIT-NEXT: store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[TBAA10]] +; TUNIT-NEXT: store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[FLOAT_TBAA10]] ; TUNIT-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5 -; TUNIT-NEXT: store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[TBAA11]] +; TUNIT-NEXT: store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]] ; TUNIT-NEXT: call void @write_arg(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(24) [[S]], i32 noundef 1) #[[ATTR18]] ; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1 ; TUNIT-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR18]] ; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2 ; TUNIT-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR18]] ; TUNIT-NEXT: [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 -; TUNIT-NEXT: [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[TBAA7]] +; TUNIT-NEXT: [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[FLOAT_TBAA7]] ; TUNIT-NEXT: [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3 -; TUNIT-NEXT: store float [[I]], ptr [[F12]], align 4, !tbaa [[TBAA7]] +; TUNIT-NEXT: store float [[I]], ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7]] ; TUNIT-NEXT: [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4 -; TUNIT-NEXT: [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[TBAA10]] +; TUNIT-NEXT: [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[FLOAT_TBAA10]] ; TUNIT-NEXT: [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00 ; TUNIT-NEXT: [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4 -; TUNIT-NEXT: store float [[MUL]], ptr [[F24]], align 4, !tbaa [[TBAA10]] +; TUNIT-NEXT: store float [[MUL]], ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10]] ; TUNIT-NEXT: [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5 -; TUNIT-NEXT: [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[TBAA11]] +; TUNIT-NEXT: [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[FLOAT_TBAA11]] ; TUNIT-NEXT: [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 -; TUNIT-NEXT: [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[TBAA7]] +; TUNIT-NEXT: [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[FLOAT_TBAA7]] ; TUNIT-NEXT: [[ADD:%.*]] = fadd float [[I5]], [[I6]] ; TUNIT-NEXT: [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5 -; TUNIT-NEXT: store float [[ADD]], ptr [[F37]], align 4, !tbaa [[TBAA11]] -; TUNIT-NEXT: [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]] -; TUNIT-NEXT: store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]] +; TUNIT-NEXT: store float [[ADD]], ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11]] +; TUNIT-NEXT: [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]] +; TUNIT-NEXT: store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]] ; TUNIT-NEXT: [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1 -; TUNIT-NEXT: [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[TBAA13]] +; TUNIT-NEXT: [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[INT_TBAA13]] ; TUNIT-NEXT: [[MUL11:%.*]] = shl nsw i32 [[I8]], 1 ; TUNIT-NEXT: [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1 -; TUNIT-NEXT: store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[TBAA13]] +; TUNIT-NEXT: store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[INT_TBAA13]] ; TUNIT-NEXT: [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2 -; TUNIT-NEXT: [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[TBAA14]] -; TUNIT-NEXT: [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]] +; TUNIT-NEXT: [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[INT_TBAA14]] +; TUNIT-NEXT: [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]] ; TUNIT-NEXT: [[ADD15:%.*]] = add nsw i32 [[I9]], [[I10]] ; TUNIT-NEXT: [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2 -; TUNIT-NEXT: store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[TBAA14]] +; TUNIT-NEXT: store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[INT_TBAA14]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) -; CGSCC-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1 -; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]], ptr noalias nofree noundef nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define void @noalias_arg_simplifiable_1( +; CGSCC-SAME: ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]], ptr noalias nofree noundef nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 -; CGSCC-NEXT: store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7]] +; CGSCC-NEXT: store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]] ; CGSCC-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4 -; CGSCC-NEXT: store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[TBAA10]] +; CGSCC-NEXT: store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[FLOAT_TBAA10]] ; CGSCC-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5 -; CGSCC-NEXT: store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[TBAA11]] +; CGSCC-NEXT: store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]] ; CGSCC-NEXT: call void @write_arg(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(24) [[S]], i32 noundef 1) #[[ATTR21]] ; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1 ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR21]] ; CGSCC-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2 ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR21]] ; CGSCC-NEXT: [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 -; CGSCC-NEXT: [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[TBAA7]] +; CGSCC-NEXT: [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[FLOAT_TBAA7]] ; CGSCC-NEXT: [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3 -; CGSCC-NEXT: store float [[I]], ptr [[F12]], align 4, !tbaa [[TBAA7]] +; CGSCC-NEXT: store float [[I]], ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7]] ; CGSCC-NEXT: [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4 -; CGSCC-NEXT: [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[TBAA10]] +; CGSCC-NEXT: [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[FLOAT_TBAA10]] ; CGSCC-NEXT: [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00 ; CGSCC-NEXT: [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4 -; CGSCC-NEXT: store float [[MUL]], ptr [[F24]], align 4, !tbaa [[TBAA10]] +; CGSCC-NEXT: store float [[MUL]], ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10]] ; CGSCC-NEXT: [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5 -; CGSCC-NEXT: [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[TBAA11]] +; CGSCC-NEXT: [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[FLOAT_TBAA11]] ; CGSCC-NEXT: [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3 -; CGSCC-NEXT: [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[TBAA7]] +; CGSCC-NEXT: [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[FLOAT_TBAA7]] ; CGSCC-NEXT: [[ADD:%.*]] = fadd float [[I5]], [[I6]] ; CGSCC-NEXT: [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5 -; CGSCC-NEXT: store float [[ADD]], ptr [[F37]], align 4, !tbaa [[TBAA11]] -; CGSCC-NEXT: [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]] -; CGSCC-NEXT: store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]] +; CGSCC-NEXT: store float [[ADD]], ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11]] +; CGSCC-NEXT: [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]] +; CGSCC-NEXT: store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]] ; CGSCC-NEXT: [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1 -; CGSCC-NEXT: [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[TBAA13]] +; CGSCC-NEXT: [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[INT_TBAA13]] ; CGSCC-NEXT: [[MUL11:%.*]] = shl nsw i32 [[I8]], 1 ; CGSCC-NEXT: [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1 -; CGSCC-NEXT: store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[TBAA13]] +; CGSCC-NEXT: store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[INT_TBAA13]] ; CGSCC-NEXT: [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2 -; CGSCC-NEXT: [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[TBAA14]] -; CGSCC-NEXT: [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]] +; CGSCC-NEXT: [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[INT_TBAA14]] +; CGSCC-NEXT: [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]] ; CGSCC-NEXT: [[ADD15:%.*]] = add nsw i32 [[I9]], [[I10]] ; CGSCC-NEXT: [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2 -; CGSCC-NEXT: store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[TBAA14]] +; CGSCC-NEXT: store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[INT_TBAA14]] ; CGSCC-NEXT: ret void ; entry: @@ -1266,157 +1266,157 @@ entry: ; define void @noalias_arg_simplifiable_2(ptr %Bytes) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_2 -; TUNIT-SAME: (ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: br label [[FOR_COND:%.*]] -; TUNIT: for.cond: -; TUNIT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; TUNIT-LABEL: define void @noalias_arg_simplifiable_2( +; TUNIT-SAME: ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] { +; TUNIT-NEXT: [[ENTRY:.*]]: +; TUNIT-NEXT: br label %[[FOR_COND:.*]] +; TUNIT: [[FOR_COND]]: +; TUNIT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ] ; TUNIT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100 -; TUNIT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; TUNIT: for.cond.cleanup: -; TUNIT-NEXT: br label [[FOR_END:%.*]] -; TUNIT: for.body: +; TUNIT-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +; TUNIT: [[FOR_COND_CLEANUP]]: +; TUNIT-NEXT: br label %[[FOR_END:.*]] +; TUNIT: [[FOR_BODY]]: ; TUNIT-NEXT: [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10 ; TUNIT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[I]] -; TUNIT-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA19]] -; TUNIT-NEXT: br label [[FOR_INC]] -; TUNIT: for.inc: +; TUNIT-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA19]] +; TUNIT-NEXT: br label %[[FOR_INC]] +; TUNIT: [[FOR_INC]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TUNIT-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] -; TUNIT: for.end: -; TUNIT-NEXT: br label [[FOR_COND2:%.*]] -; TUNIT: for.cond2: -; TUNIT-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ] +; TUNIT-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; TUNIT: [[FOR_END]]: +; TUNIT-NEXT: br label %[[FOR_COND2:.*]] +; TUNIT: [[FOR_COND2]]: +; TUNIT-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ] ; TUNIT-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10 -; TUNIT-NEXT: br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]] -; TUNIT: for.cond.cleanup4: -; TUNIT-NEXT: br label [[FOR_END11:%.*]] -; TUNIT: for.body5: +; TUNIT-NEXT: br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]] +; TUNIT: [[FOR_COND_CLEANUP4]]: +; TUNIT-NEXT: br label %[[FOR_END11:.*]] +; TUNIT: [[FOR_BODY5]]: ; TUNIT-NEXT: [[I16:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10 ; TUNIT-NEXT: [[I17:%.*]] = or i64 [[I16]], 1 ; TUNIT-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I17]] -; TUNIT-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA26:![0-9]+]] -; TUNIT-NEXT: br label [[FOR_INC9]] -; TUNIT: for.inc9: +; TUNIT-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA26:![0-9]+]] +; TUNIT-NEXT: br label %[[FOR_INC9]] +; TUNIT: [[FOR_INC9]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; TUNIT-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP27:![0-9]+]] -; TUNIT: for.end11: -; TUNIT-NEXT: br label [[FOR_COND13:%.*]] -; TUNIT: for.cond13: -; TUNIT-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ] +; TUNIT-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP27:![0-9]+]] +; TUNIT: [[FOR_END11]]: +; TUNIT-NEXT: br label %[[FOR_COND13:.*]] +; TUNIT: [[FOR_COND13]]: +; TUNIT-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ] ; TUNIT-NEXT: [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20 -; TUNIT-NEXT: br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]] -; TUNIT: for.cond.cleanup15: -; TUNIT-NEXT: br label [[FOR_END23:%.*]] -; TUNIT: for.body16: +; TUNIT-NEXT: br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]] +; TUNIT: [[FOR_COND_CLEANUP15]]: +; TUNIT-NEXT: br label %[[FOR_END23:.*]] +; TUNIT: [[FOR_BODY16]]: ; TUNIT-NEXT: [[I19:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10 ; TUNIT-NEXT: [[I20:%.*]] = add nuw nsw i64 [[I19]], 2 ; TUNIT-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I20]] -; TUNIT-NEXT: store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[TBAA28:![0-9]+]] -; TUNIT-NEXT: br label [[FOR_INC21]] -; TUNIT: for.inc21: +; TUNIT-NEXT: store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[LONG_LONG_TBAA28:![0-9]+]] +; TUNIT-NEXT: br label %[[FOR_INC21]] +; TUNIT: [[FOR_INC21]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1 -; TUNIT-NEXT: br label [[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]] -; TUNIT: for.end23: +; TUNIT-NEXT: br label %[[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]] +; TUNIT: [[FOR_END23]]: ; TUNIT-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 1023 -; TUNIT-NEXT: store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[TBAA19]] +; TUNIT-NEXT: store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[CHAR_TBAA19]] ; TUNIT-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 500 ; TUNIT-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) [[ARRAYIDX25]], i32 noundef 0) #[[ATTR18]] -; TUNIT-NEXT: br label [[FOR_COND27:%.*]] -; TUNIT: for.cond27: -; TUNIT-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ] +; TUNIT-NEXT: br label %[[FOR_COND27:.*]] +; TUNIT: [[FOR_COND27]]: +; TUNIT-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC35:.*]] ], [ 0, %[[FOR_END23]] ] ; TUNIT-NEXT: [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024 -; TUNIT-NEXT: br i1 [[EXITCOND14]], label [[FOR_BODY30:%.*]], label [[FOR_COND_CLEANUP29:%.*]] -; TUNIT: for.cond.cleanup29: -; TUNIT-NEXT: br label [[FOR_END37:%.*]] -; TUNIT: for.body30: +; TUNIT-NEXT: br i1 [[EXITCOND14]], label %[[FOR_BODY30:.*]], label %[[FOR_COND_CLEANUP29:.*]] +; TUNIT: [[FOR_COND_CLEANUP29]]: +; TUNIT-NEXT: br label %[[FOR_END37:.*]] +; TUNIT: [[FOR_BODY30]]: ; TUNIT-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[INDVARS_IV12]] -; TUNIT-NEXT: [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]] +; TUNIT-NEXT: [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA19]] ; TUNIT-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]] -; TUNIT-NEXT: store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[TBAA19]] -; TUNIT-NEXT: br label [[FOR_INC35]] -; TUNIT: for.inc35: +; TUNIT-NEXT: store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[CHAR_TBAA19]] +; TUNIT-NEXT: br label %[[FOR_INC35]] +; TUNIT: [[FOR_INC35]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1 -; TUNIT-NEXT: br label [[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]] -; TUNIT: for.end37: +; TUNIT-NEXT: br label %[[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]] +; TUNIT: [[FOR_END37]]: ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_2 -; CGSCC-SAME: (ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: br label [[FOR_COND:%.*]] -; CGSCC: for.cond: -; CGSCC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CGSCC-LABEL: define void @noalias_arg_simplifiable_2( +; CGSCC-SAME: ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] { +; CGSCC-NEXT: [[ENTRY:.*]]: +; CGSCC-NEXT: br label %[[FOR_COND:.*]] +; CGSCC: [[FOR_COND]]: +; CGSCC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ] ; CGSCC-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100 -; CGSCC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CGSCC: for.cond.cleanup: -; CGSCC-NEXT: br label [[FOR_END:%.*]] -; CGSCC: for.body: +; CGSCC-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CGSCC: [[FOR_COND_CLEANUP]]: +; CGSCC-NEXT: br label %[[FOR_END:.*]] +; CGSCC: [[FOR_BODY]]: ; CGSCC-NEXT: [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10 ; CGSCC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[I]] -; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA15]] -; CGSCC-NEXT: br label [[FOR_INC]] -; CGSCC: for.inc: +; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA15]] +; CGSCC-NEXT: br label %[[FOR_INC]] +; CGSCC: [[FOR_INC]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CGSCC-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] -; CGSCC: for.end: -; CGSCC-NEXT: br label [[FOR_COND2:%.*]] -; CGSCC: for.cond2: -; CGSCC-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ] +; CGSCC-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; CGSCC: [[FOR_END]]: +; CGSCC-NEXT: br label %[[FOR_COND2:.*]] +; CGSCC: [[FOR_COND2]]: +; CGSCC-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ] ; CGSCC-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10 -; CGSCC-NEXT: br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]] -; CGSCC: for.cond.cleanup4: -; CGSCC-NEXT: br label [[FOR_END11:%.*]] -; CGSCC: for.body5: +; CGSCC-NEXT: br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]] +; CGSCC: [[FOR_COND_CLEANUP4]]: +; CGSCC-NEXT: br label %[[FOR_END11:.*]] +; CGSCC: [[FOR_BODY5]]: ; CGSCC-NEXT: [[I16:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10 ; CGSCC-NEXT: [[I17:%.*]] = or i64 [[I16]], 1 ; CGSCC-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I17]] -; CGSCC-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]] -; CGSCC-NEXT: br label [[FOR_INC9]] -; CGSCC: for.inc9: +; CGSCC-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA18]] +; CGSCC-NEXT: br label %[[FOR_INC9]] +; CGSCC: [[FOR_INC9]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CGSCC-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP29:![0-9]+]] -; CGSCC: for.end11: -; CGSCC-NEXT: br label [[FOR_COND13:%.*]] -; CGSCC: for.cond13: -; CGSCC-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ] +; CGSCC-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP29:![0-9]+]] +; CGSCC: [[FOR_END11]]: +; CGSCC-NEXT: br label %[[FOR_COND13:.*]] +; CGSCC: [[FOR_COND13]]: +; CGSCC-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ] ; CGSCC-NEXT: [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20 -; CGSCC-NEXT: br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]] -; CGSCC: for.cond.cleanup15: -; CGSCC-NEXT: br label [[FOR_END23:%.*]] -; CGSCC: for.body16: +; CGSCC-NEXT: br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]] +; CGSCC: [[FOR_COND_CLEANUP15]]: +; CGSCC-NEXT: br label %[[FOR_END23:.*]] +; CGSCC: [[FOR_BODY16]]: ; CGSCC-NEXT: [[I19:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10 ; CGSCC-NEXT: [[I20:%.*]] = add nuw nsw i64 [[I19]], 2 ; CGSCC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I20]] -; CGSCC-NEXT: store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[TBAA20]] -; CGSCC-NEXT: br label [[FOR_INC21]] -; CGSCC: for.inc21: +; CGSCC-NEXT: store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[LONG_LONG_TBAA20]] +; CGSCC-NEXT: br label %[[FOR_INC21]] +; CGSCC: [[FOR_INC21]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1 -; CGSCC-NEXT: br label [[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]] -; CGSCC: for.end23: +; CGSCC-NEXT: br label %[[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]] +; CGSCC: [[FOR_END23]]: ; CGSCC-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 1023 -; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]] +; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[CHAR_TBAA15]] ; CGSCC-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 500 ; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[ARRAYIDX25]], i32 noundef 0) #[[ATTR21]] -; CGSCC-NEXT: br label [[FOR_COND27:%.*]] -; CGSCC: for.cond27: -; CGSCC-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ] +; CGSCC-NEXT: br label %[[FOR_COND27:.*]] +; CGSCC: [[FOR_COND27]]: +; CGSCC-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC35:.*]] ], [ 0, %[[FOR_END23]] ] ; CGSCC-NEXT: [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024 -; CGSCC-NEXT: br i1 [[EXITCOND14]], label [[FOR_BODY30:%.*]], label [[FOR_COND_CLEANUP29:%.*]] -; CGSCC: for.cond.cleanup29: -; CGSCC-NEXT: br label [[FOR_END37:%.*]] -; CGSCC: for.body30: +; CGSCC-NEXT: br i1 [[EXITCOND14]], label %[[FOR_BODY30:.*]], label %[[FOR_COND_CLEANUP29:.*]] +; CGSCC: [[FOR_COND_CLEANUP29]]: +; CGSCC-NEXT: br label %[[FOR_END37:.*]] +; CGSCC: [[FOR_BODY30]]: ; CGSCC-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[INDVARS_IV12]] -; CGSCC-NEXT: [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]] +; CGSCC-NEXT: [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA15]] ; CGSCC-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]] -; CGSCC-NEXT: store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[TBAA15]] -; CGSCC-NEXT: br label [[FOR_INC35]] -; CGSCC: for.inc35: +; CGSCC-NEXT: store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[CHAR_TBAA15]] +; CGSCC-NEXT: br label %[[FOR_INC35]] +; CGSCC: [[FOR_INC35]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1 -; CGSCC-NEXT: br label [[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]] -; CGSCC: for.end37: +; CGSCC-NEXT: br label %[[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]] +; CGSCC: [[FOR_END37]]: ; CGSCC-NEXT: ret void ; entry: @@ -1524,40 +1524,40 @@ for.end37: ; preds = %for.cond.cleanup29 ; } ; define i32 @local_alloca_not_simplifiable_1() { -; TUNIT-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_1() { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define i32 @local_alloca_not_simplifiable_1() { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[X:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[Y:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: call void @llvm.lifetime.start.p0(ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[X]]) #[[ATTR17]] ; TUNIT-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]]) #[[ATTR17]] -; TUNIT-NEXT: store i32 1, ptr [[Y]], align 4, !tbaa [[TBAA3]] -; TUNIT-NEXT: store i32 1, ptr [[X]], align 4, !tbaa [[TBAA3]] +; TUNIT-NEXT: store i32 1, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]] +; TUNIT-NEXT: store i32 1, ptr [[X]], align 4, !tbaa [[INT_TBAA3]] ; TUNIT-NEXT: call void @escape(ptr noundef nonnull align 4 dereferenceable(4) [[X]]) ; TUNIT-NEXT: call void @write_random(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Y]]) -; TUNIT-NEXT: [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA3]] +; TUNIT-NEXT: [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA3]] ; TUNIT-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[I3]], 0 ; TUNIT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL_NOT]], i32 2, i32 1 -; TUNIT-NEXT: [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA3]] +; TUNIT-NEXT: [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]] ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[I3]], [[I4]] ; TUNIT-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[COND]] ; TUNIT-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]]) ; TUNIT-NEXT: call void @llvm.lifetime.end.p0(ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[X]]) ; TUNIT-NEXT: ret i32 [[ADD1]] ; -; CGSCC-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_1() { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define i32 @local_alloca_not_simplifiable_1() { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[X:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: [[Y:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: call void @llvm.lifetime.start.p0(ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[X]]) #[[ATTR20]] ; CGSCC-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]]) #[[ATTR20]] -; CGSCC-NEXT: store i32 1, ptr [[Y]], align 4, !tbaa [[TBAA3]] -; CGSCC-NEXT: store i32 1, ptr [[X]], align 4, !tbaa [[TBAA3]] +; CGSCC-NEXT: store i32 1, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]] +; CGSCC-NEXT: store i32 1, ptr [[X]], align 4, !tbaa [[INT_TBAA3]] ; CGSCC-NEXT: call void @escape(ptr noundef nonnull align 4 dereferenceable(4) [[X]]) ; CGSCC-NEXT: call void @write_random(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Y]]) -; CGSCC-NEXT: [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA3]] +; CGSCC-NEXT: [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA3]] ; CGSCC-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[I3]], 0 ; CGSCC-NEXT: [[COND:%.*]] = select i1 [[TOBOOL_NOT]], i32 2, i32 1 -; CGSCC-NEXT: [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA3]] +; CGSCC-NEXT: [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]] ; CGSCC-NEXT: [[ADD:%.*]] = add nsw i32 [[I3]], [[I4]] ; CGSCC-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[COND]] ; CGSCC-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]]) @@ -1586,20 +1586,20 @@ entry: define i8 @local_alloca_not_simplifiable_2(i64 %index1, i64 %index2, i1 %cnd) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_2 -; CHECK-SAME: (i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define i8 @local_alloca_not_simplifiable_2( +; CHECK-SAME: i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CHECK-NEXT: store i8 7, ptr [[BYTES]], align 16 -; CHECK-NEXT: br i1 [[CND]], label [[LEFT:%.*]], label [[RIGHT:%.*]] -; CHECK: left: +; CHECK-NEXT: br i1 [[CND]], label %[[LEFT:.*]], label %[[RIGHT:.*]] +; CHECK: [[LEFT]]: ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[INDEX1]] -; CHECK-NEXT: br label [[JOIN:%.*]] -; CHECK: right: +; CHECK-NEXT: br label %[[JOIN:.*]] +; CHECK: [[RIGHT]]: ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[INDEX2]] -; CHECK-NEXT: br label [[JOIN]] -; CHECK: join: -; CHECK-NEXT: [[GEP_JOIN:%.*]] = phi ptr [ [[GEP1]], [[LEFT]] ], [ [[GEP2]], [[RIGHT]] ] +; CHECK-NEXT: br label %[[JOIN]] +; CHECK: [[JOIN]]: +; CHECK-NEXT: [[GEP_JOIN:%.*]] = phi ptr [ [[GEP1]], %[[LEFT]] ], [ [[GEP2]], %[[RIGHT]] ] ; CHECK-NEXT: store i8 9, ptr [[GEP_JOIN]], align 4 ; CHECK-NEXT: [[I:%.*]] = load i8, ptr [[BYTES]], align 16 ; CHECK-NEXT: ret i8 [[I]] @@ -1630,9 +1630,9 @@ join: ; preds = %right, %left ; We could simplify these if we separate accessed bins wrt. alignment (here mod 4). define i32 @unknown_access_mixed_simplifiable(i32 %arg1, i32 %arg2) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_simplifiable -; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @unknown_access_mixed_simplifiable( +; CHECK-SAME: i32 [[ARG1:%.*]], i32 [[ARG2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[S]], i32 [[ARG1]] @@ -1666,9 +1666,9 @@ entry: ; The access to bc4b could go anywhere, nothing is simplifiable. define i32 @unknown_access_mixed_not_simplifiable(i32 %arg1, i32 %arg2, i32 %arg3) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_not_simplifiable -; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @unknown_access_mixed_not_simplifiable( +; CHECK-SAME: i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[S]], i32 [[ARG1]] @@ -1716,17 +1716,17 @@ declare void @escape(ptr) ; define i32 @global_not_simplifiable_1(i32 %cnd) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read) -; TUNIT-LABEL: define {{[^@]+}}@global_not_simplifiable_1 -; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[TBAA3]] +; TUNIT-LABEL: define i32 @global_not_simplifiable_1( +; TUNIT-SAME: i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] { +; TUNIT-NEXT: [[ENTRY:.*:]] +; TUNIT-NEXT: [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[INT_TBAA3]] ; TUNIT-NEXT: ret i32 [[I]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read) -; CGSCC-LABEL: define {{[^@]+}}@global_not_simplifiable_1 -; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR7:[0-9]+]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[TBAA3]] +; CGSCC-LABEL: define i32 @global_not_simplifiable_1( +; CGSCC-SAME: i32 [[CND:%.*]]) #[[ATTR7:[0-9]+]] { +; CGSCC-NEXT: [[ENTRY:.*:]] +; CGSCC-NEXT: [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[INT_TBAA3]] ; CGSCC-NEXT: ret i32 [[I]] ; entry: @@ -1744,15 +1744,15 @@ entry: ; } ; define i32 @static_global_not_simplifiable_1(i32 %cnd) { -; CHECK-LABEL: define {{[^@]+}}@static_global_not_simplifiable_1 -; CHECK-SAME: (i32 [[CND:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @static_global_not_simplifiable_1( +; CHECK-SAME: i32 [[CND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @sync() ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: ; CHECK-NEXT: ret i32 1 ; entry: @@ -1780,13 +1780,13 @@ declare void @sync() ; return v; ; } define i32 @static_global_simplifiable_4(i32 %cnd) { -; CHECK-LABEL: define {{[^@]+}}@static_global_simplifiable_4 -; CHECK-SAME: (i32 [[CND:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 1, ptr @Flag2, align 4, !tbaa [[TBAA3]] +; CHECK-LABEL: define noundef i32 @static_global_simplifiable_4( +; CHECK-SAME: i32 [[CND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store i32 1, ptr @Flag2, align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: call void @sync() -; CHECK-NEXT: [[I:%.*]] = load i32, ptr @Flag2, align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: store i32 2, ptr @Flag2, align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[I:%.*]] = load i32, ptr @Flag2, align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: store i32 2, ptr @Flag2, align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: ret i32 [[I]] ; entry: @@ -1806,22 +1806,22 @@ entry: ; return v; ; } define i32 @static_global_not_simplifiable_2(i32 %cnd) { -; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2 -; TUNIT-SAME: (i32 [[CND:%.*]]) { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: store i32 1, ptr @Flag4, align 4, !tbaa [[TBAA3]] +; TUNIT-LABEL: define noundef i32 @static_global_not_simplifiable_2( +; TUNIT-SAME: i32 [[CND:%.*]]) { +; TUNIT-NEXT: [[ENTRY:.*:]] +; TUNIT-NEXT: store i32 1, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]] ; TUNIT-NEXT: call void @sync() #[[ATTR19:[0-9]+]] -; TUNIT-NEXT: [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[TBAA3]] -; TUNIT-NEXT: store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]] +; TUNIT-NEXT: [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]] +; TUNIT-NEXT: store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]] ; TUNIT-NEXT: ret i32 [[I]] ; -; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2 -; CGSCC-SAME: (i32 [[CND:%.*]]) { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: store i32 1, ptr @Flag4, align 4, !tbaa [[TBAA3]] +; CGSCC-LABEL: define noundef i32 @static_global_not_simplifiable_2( +; CGSCC-SAME: i32 [[CND:%.*]]) { +; CGSCC-NEXT: [[ENTRY:.*:]] +; CGSCC-NEXT: store i32 1, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]] ; CGSCC-NEXT: call void @sync() #[[ATTR22:[0-9]+]] -; CGSCC-NEXT: [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[TBAA3]] -; CGSCC-NEXT: store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]] +; CGSCC-NEXT: [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]] +; CGSCC-NEXT: store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]] ; CGSCC-NEXT: ret i32 [[I]] ; entry: @@ -1833,15 +1833,15 @@ entry: } define void @static_global_not_simplifiable_2_helper() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper -; TUNIT-SAME: () #[[ATTR5]] { -; TUNIT-NEXT: store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]] +; TUNIT-LABEL: define void @static_global_not_simplifiable_2_helper( +; TUNIT-SAME: ) #[[ATTR5]] { +; TUNIT-NEXT: store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper -; CGSCC-SAME: () #[[ATTR6]] { -; CGSCC-NEXT: store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]] +; CGSCC-LABEL: define void @static_global_not_simplifiable_2_helper( +; CGSCC-SAME: ) #[[ATTR6]] { +; CGSCC-NEXT: store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]] ; CGSCC-NEXT: ret void ; store i32 2, ptr @Flag4, align 4, !tbaa !3 @@ -1851,19 +1851,19 @@ define void @static_global_not_simplifiable_2_helper() { ; Similiar to static_global_simplifiable_3 but with a may-store. define i32 @static_global_not_simplifiable_3(i1 %c, ptr %p) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_3 -; TUNIT-SAME: (i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR3]] { +; TUNIT-LABEL: define noundef i32 @static_global_not_simplifiable_3( +; TUNIT-SAME: i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[SEL:%.*]] = select i1 [[C]], ptr @Flag3, ptr [[P]] -; TUNIT-NEXT: store i32 1, ptr [[SEL]], align 4, !tbaa [[TBAA3]] -; TUNIT-NEXT: [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[TBAA3]] +; TUNIT-NEXT: store i32 1, ptr [[SEL]], align 4, !tbaa [[INT_TBAA3]] +; TUNIT-NEXT: [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]] ; TUNIT-NEXT: ret i32 [[I]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_3 -; CGSCC-SAME: (i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR5]] { +; CGSCC-LABEL: define noundef i32 @static_global_not_simplifiable_3( +; CGSCC-SAME: i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: [[SEL:%.*]] = select i1 [[C]], ptr @Flag3, ptr [[P]] -; CGSCC-NEXT: store i32 1, ptr [[SEL]], align 4, !tbaa [[TBAA3]] -; CGSCC-NEXT: [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[TBAA3]] +; CGSCC-NEXT: store i32 1, ptr [[SEL]], align 4, !tbaa [[INT_TBAA3]] +; CGSCC-NEXT: [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]] ; CGSCC-NEXT: ret i32 [[I]] ; %sel = select i1 %c, ptr @Flag3, ptr %p @@ -1887,15 +1887,15 @@ define i32 @static_global_not_simplifiable_3(i1 %c, ptr %p) { ; FIXME: We could replace these loads. define i32 @write_read_global() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@write_read_global -; TUNIT-SAME: () #[[ATTR3]] { +; TUNIT-LABEL: define i32 @write_read_global( +; TUNIT-SAME: ) #[[ATTR3]] { ; TUNIT-NEXT: store i32 7, ptr @Gint1, align 4 ; TUNIT-NEXT: [[L:%.*]] = load i32, ptr @Gint1, align 4 ; TUNIT-NEXT: ret i32 [[L]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@write_read_global -; CGSCC-SAME: () #[[ATTR5]] { +; CGSCC-LABEL: define i32 @write_read_global( +; CGSCC-SAME: ) #[[ATTR5]] { ; CGSCC-NEXT: store i32 7, ptr @Gint1, align 4 ; CGSCC-NEXT: [[L:%.*]] = load i32, ptr @Gint1, align 4 ; CGSCC-NEXT: ret i32 [[L]] @@ -1906,14 +1906,14 @@ define i32 @write_read_global() { } define void @write_global() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@write_global -; TUNIT-SAME: () #[[ATTR5]] { +; TUNIT-LABEL: define void @write_global( +; TUNIT-SAME: ) #[[ATTR5]] { ; TUNIT-NEXT: store i32 7, ptr @Gint2, align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@write_global -; CGSCC-SAME: () #[[ATTR6]] { +; CGSCC-LABEL: define void @write_global( +; CGSCC-SAME: ) #[[ATTR6]] { ; CGSCC-NEXT: store i32 7, ptr @Gint2, align 4 ; CGSCC-NEXT: ret void ; @@ -1922,14 +1922,14 @@ define void @write_global() { } define i32 @read_global() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read) -; TUNIT-LABEL: define {{[^@]+}}@read_global -; TUNIT-SAME: () #[[ATTR6]] { +; TUNIT-LABEL: define i32 @read_global( +; TUNIT-SAME: ) #[[ATTR6]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, ptr @Gint2, align 4 ; TUNIT-NEXT: ret i32 [[L]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read) -; CGSCC-LABEL: define {{[^@]+}}@read_global -; CGSCC-SAME: () #[[ATTR7]] { +; CGSCC-LABEL: define i32 @read_global( +; CGSCC-SAME: ) #[[ATTR7]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, ptr @Gint2, align 4 ; CGSCC-NEXT: ret i32 [[L]] ; @@ -1938,13 +1938,13 @@ define i32 @read_global() { } define i32 @write_read_static_global() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@write_read_static_global -; TUNIT-SAME: () #[[ATTR5]] { +; TUNIT-LABEL: define noundef i32 @write_read_static_global( +; TUNIT-SAME: ) #[[ATTR5]] { ; TUNIT-NEXT: ret i32 7 ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@write_read_static_global -; CGSCC-SAME: () #[[ATTR6]] { +; CGSCC-LABEL: define noundef i32 @write_read_static_global( +; CGSCC-SAME: ) #[[ATTR6]] { ; CGSCC-NEXT: ret i32 7 ; store i32 7, ptr @Gstatic_int1 @@ -1953,14 +1953,14 @@ define i32 @write_read_static_global() { } define void @write_static_global() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@write_static_global -; TUNIT-SAME: () #[[ATTR5]] { +; TUNIT-LABEL: define void @write_static_global( +; TUNIT-SAME: ) #[[ATTR5]] { ; TUNIT-NEXT: store i32 7, ptr @Gstatic_int2, align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@write_static_global -; CGSCC-SAME: () #[[ATTR6]] { +; CGSCC-LABEL: define void @write_static_global( +; CGSCC-SAME: ) #[[ATTR6]] { ; CGSCC-NEXT: store i32 7, ptr @Gstatic_int2, align 4 ; CGSCC-NEXT: ret void ; @@ -1969,14 +1969,14 @@ define void @write_static_global() { } define i32 @read_static_global() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read) -; TUNIT-LABEL: define {{[^@]+}}@read_static_global -; TUNIT-SAME: () #[[ATTR6]] { +; TUNIT-LABEL: define noundef i32 @read_static_global( +; TUNIT-SAME: ) #[[ATTR6]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, ptr @Gstatic_int2, align 4 ; TUNIT-NEXT: ret i32 [[L]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read) -; CGSCC-LABEL: define {{[^@]+}}@read_static_global -; CGSCC-SAME: () #[[ATTR7]] { +; CGSCC-LABEL: define noundef i32 @read_static_global( +; CGSCC-SAME: ) #[[ATTR7]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, ptr @Gstatic_int2, align 4 ; CGSCC-NEXT: ret i32 [[L]] ; @@ -1985,13 +1985,13 @@ define i32 @read_static_global() { } define i32 @write_read_static_undef_global() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@write_read_static_undef_global -; TUNIT-SAME: () #[[ATTR5]] { +; TUNIT-LABEL: define noundef i32 @write_read_static_undef_global( +; TUNIT-SAME: ) #[[ATTR5]] { ; TUNIT-NEXT: ret i32 7 ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@write_read_static_undef_global -; CGSCC-SAME: () #[[ATTR6]] { +; CGSCC-LABEL: define noundef i32 @write_read_static_undef_global( +; CGSCC-SAME: ) #[[ATTR6]] { ; CGSCC-NEXT: ret i32 7 ; store i32 7, ptr @Gstatic_undef_int1 @@ -2000,13 +2000,13 @@ define i32 @write_read_static_undef_global() { } define void @write_static_undef_global() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; TUNIT-LABEL: define {{[^@]+}}@write_static_undef_global -; TUNIT-SAME: () #[[ATTR5]] { +; TUNIT-LABEL: define void @write_static_undef_global( +; TUNIT-SAME: ) #[[ATTR5]] { ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) -; CGSCC-LABEL: define {{[^@]+}}@write_static_undef_global -; CGSCC-SAME: () #[[ATTR6]] { +; CGSCC-LABEL: define void @write_static_undef_global( +; CGSCC-SAME: ) #[[ATTR6]] { ; CGSCC-NEXT: store i32 7, ptr @Gstatic_undef_int2, align 4 ; CGSCC-NEXT: ret void ; @@ -2015,8 +2015,8 @@ define void @write_static_undef_global() { } define i32 @read_static_undef_global() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@read_static_undef_global -; CHECK-SAME: () #[[ATTR4]] { +; CHECK-LABEL: define i32 @read_static_undef_global( +; CHECK-SAME: ) #[[ATTR4]] { ; CHECK-NEXT: ret i32 7 ; %l = load i32, ptr @Gstatic_undef_int2 @@ -2025,8 +2025,8 @@ define i32 @read_static_undef_global() { define i32 @single_read_of_static_global() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@single_read_of_static_global -; CHECK-SAME: () #[[ATTR4]] { +; CHECK-LABEL: define noundef i32 @single_read_of_static_global( +; CHECK-SAME: ) #[[ATTR4]] { ; CHECK-NEXT: ret i32 0 ; %l = load i32, ptr @Gstatic_int3 @@ -2035,20 +2035,20 @@ define i32 @single_read_of_static_global() { define i8 @phi_store() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@phi_store -; CHECK-SAME: () #[[ATTR4]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define i8 @phi_store( +; CHECK-SAME: ) #[[ATTR4]] { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[A:%.*]] = alloca i16, align 2 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[A]], %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ] ; CHECK-NEXT: store i8 1, ptr [[P]], align 1 ; CHECK-NEXT: [[G]] = getelementptr i8, ptr [[P]], i64 1 ; CHECK-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 2 -; CHECK-NEXT: br i1 [[C]], label [[END:%.*]], label [[LOOP]] -; CHECK: end: +; CHECK-NEXT: br i1 [[C]], label %[[END:.*]], label %[[LOOP]] +; CHECK: [[END]]: ; CHECK-NEXT: [[S:%.*]] = getelementptr i8, ptr [[A]], i64 1 ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[S]], align 1 ; CHECK-NEXT: ret i8 [[L]] @@ -2074,19 +2074,19 @@ end: define i8 @phi_no_store_1() { ; ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@phi_no_store_1 -; TUNIT-SAME: () #[[ATTR3]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: br label [[LOOP:%.*]] -; TUNIT: loop: -; TUNIT-NEXT: [[P:%.*]] = phi ptr [ @a1, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] -; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; TUNIT-LABEL: define i8 @phi_no_store_1( +; TUNIT-SAME: ) #[[ATTR3]] { +; TUNIT-NEXT: [[ENTRY:.*]]: +; TUNIT-NEXT: br label %[[LOOP:.*]] +; TUNIT: [[LOOP]]: +; TUNIT-NEXT: [[P:%.*]] = phi ptr [ @a1, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ] +; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ] ; TUNIT-NEXT: store i8 1, ptr [[P]], align 1 ; TUNIT-NEXT: [[G]] = getelementptr i8, ptr [[P]], i64 1 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 3 -; TUNIT-NEXT: br i1 [[C]], label [[END:%.*]], label [[LOOP]] -; TUNIT: end: +; TUNIT-NEXT: br i1 [[C]], label %[[END:.*]], label %[[LOOP]] +; TUNIT: [[END]]: ; TUNIT-NEXT: [[S11:%.*]] = getelementptr i8, ptr @a1, i64 2 ; TUNIT-NEXT: [[L11:%.*]] = load i8, ptr [[S11]], align 2 ; TUNIT-NEXT: [[S12:%.*]] = getelementptr i8, ptr @a1, i64 3 @@ -2095,19 +2095,19 @@ define i8 @phi_no_store_1() { ; TUNIT-NEXT: ret i8 [[ADD]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@phi_no_store_1 -; CGSCC-SAME: () #[[ATTR5]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: br label [[LOOP:%.*]] -; CGSCC: loop: -; CGSCC-NEXT: [[P:%.*]] = phi ptr [ @a1, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] -; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; CGSCC-LABEL: define i8 @phi_no_store_1( +; CGSCC-SAME: ) #[[ATTR5]] { +; CGSCC-NEXT: [[ENTRY:.*]]: +; CGSCC-NEXT: br label %[[LOOP:.*]] +; CGSCC: [[LOOP]]: +; CGSCC-NEXT: [[P:%.*]] = phi ptr [ @a1, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ] +; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ] ; CGSCC-NEXT: store i8 1, ptr [[P]], align 1 ; CGSCC-NEXT: [[G]] = getelementptr i8, ptr [[P]], i64 1 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 3 -; CGSCC-NEXT: br i1 [[C]], label [[END:%.*]], label [[LOOP]] -; CGSCC: end: +; CGSCC-NEXT: br i1 [[C]], label %[[END:.*]], label %[[LOOP]] +; CGSCC: [[END]]: ; CGSCC-NEXT: [[S11:%.*]] = getelementptr i8, ptr @a1, i64 2 ; CGSCC-NEXT: [[L11:%.*]] = load i8, ptr [[S11]], align 2 ; CGSCC-NEXT: [[S12:%.*]] = getelementptr i8, ptr @a1, i64 3 @@ -2138,19 +2138,19 @@ end: define i8 @phi_no_store_2() { ; ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@phi_no_store_2 -; TUNIT-SAME: () #[[ATTR3]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: br label [[LOOP:%.*]] -; TUNIT: loop: -; TUNIT-NEXT: [[P:%.*]] = phi ptr [ @a2, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] -; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; TUNIT-LABEL: define i8 @phi_no_store_2( +; TUNIT-SAME: ) #[[ATTR3]] { +; TUNIT-NEXT: [[ENTRY:.*]]: +; TUNIT-NEXT: br label %[[LOOP:.*]] +; TUNIT: [[LOOP]]: +; TUNIT-NEXT: [[P:%.*]] = phi ptr [ @a2, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ] +; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ] ; TUNIT-NEXT: store i8 1, ptr [[P]], align 1 ; TUNIT-NEXT: [[G]] = getelementptr i8, ptr @a2, i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 -; TUNIT-NEXT: br i1 [[C]], label [[END:%.*]], label [[LOOP]] -; TUNIT: end: +; TUNIT-NEXT: br i1 [[C]], label %[[END:.*]], label %[[LOOP]] +; TUNIT: [[END]]: ; TUNIT-NEXT: [[S21:%.*]] = getelementptr i8, ptr @a2, i64 2 ; TUNIT-NEXT: [[L21:%.*]] = load i8, ptr [[S21]], align 2 ; TUNIT-NEXT: [[S22:%.*]] = getelementptr i8, ptr @a2, i64 3 @@ -2159,19 +2159,19 @@ define i8 @phi_no_store_2() { ; TUNIT-NEXT: ret i8 [[ADD]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@phi_no_store_2 -; CGSCC-SAME: () #[[ATTR5]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: br label [[LOOP:%.*]] -; CGSCC: loop: -; CGSCC-NEXT: [[P:%.*]] = phi ptr [ @a2, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] -; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; CGSCC-LABEL: define i8 @phi_no_store_2( +; CGSCC-SAME: ) #[[ATTR5]] { +; CGSCC-NEXT: [[ENTRY:.*]]: +; CGSCC-NEXT: br label %[[LOOP:.*]] +; CGSCC: [[LOOP]]: +; CGSCC-NEXT: [[P:%.*]] = phi ptr [ @a2, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ] +; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ] ; CGSCC-NEXT: store i8 1, ptr [[P]], align 1 ; CGSCC-NEXT: [[G]] = getelementptr i8, ptr @a2, i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 -; CGSCC-NEXT: br i1 [[C]], label [[END:%.*]], label [[LOOP]] -; CGSCC: end: +; CGSCC-NEXT: br i1 [[C]], label %[[END:.*]], label %[[LOOP]] +; CGSCC: [[END]]: ; CGSCC-NEXT: [[S21:%.*]] = getelementptr i8, ptr @a2, i64 2 ; CGSCC-NEXT: [[L21:%.*]] = load i8, ptr [[S21]], align 2 ; CGSCC-NEXT: [[S22:%.*]] = getelementptr i8, ptr @a2, i64 3 @@ -2200,21 +2200,21 @@ end: define i8 @phi_no_store_3() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@phi_no_store_3 -; TUNIT-SAME: () #[[ATTR3]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define i8 @phi_no_store_3( +; TUNIT-SAME: ) #[[ATTR3]] { +; TUNIT-NEXT: [[ENTRY:.*]]: ; TUNIT-NEXT: [[S30:%.*]] = getelementptr i8, ptr @a3, i64 3 ; TUNIT-NEXT: store i8 0, ptr [[S30]], align 1 -; TUNIT-NEXT: br label [[LOOP:%.*]] -; TUNIT: loop: -; TUNIT-NEXT: [[P:%.*]] = phi ptr [ @a3, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] -; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; TUNIT-NEXT: br label %[[LOOP:.*]] +; TUNIT: [[LOOP]]: +; TUNIT-NEXT: [[P:%.*]] = phi ptr [ @a3, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ] +; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ] ; TUNIT-NEXT: store i8 1, ptr [[P]], align 1 ; TUNIT-NEXT: [[G]] = getelementptr i8, ptr @a3, i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 -; TUNIT-NEXT: br i1 [[C]], label [[END:%.*]], label [[LOOP]] -; TUNIT: end: +; TUNIT-NEXT: br i1 [[C]], label %[[END:.*]], label %[[LOOP]] +; TUNIT: [[END]]: ; TUNIT-NEXT: [[S31:%.*]] = getelementptr i8, ptr @a3, i64 2 ; TUNIT-NEXT: [[L31:%.*]] = load i8, ptr [[S31]], align 2 ; TUNIT-NEXT: [[S32:%.*]] = getelementptr i8, ptr @a3, i64 3 @@ -2226,21 +2226,21 @@ define i8 @phi_no_store_3() { ; TUNIT-NEXT: ret i8 [[ADD2]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@phi_no_store_3 -; CGSCC-SAME: () #[[ATTR5]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define i8 @phi_no_store_3( +; CGSCC-SAME: ) #[[ATTR5]] { +; CGSCC-NEXT: [[ENTRY:.*]]: ; CGSCC-NEXT: [[S30:%.*]] = getelementptr i8, ptr @a3, i64 3 ; CGSCC-NEXT: store i8 0, ptr [[S30]], align 1 -; CGSCC-NEXT: br label [[LOOP:%.*]] -; CGSCC: loop: -; CGSCC-NEXT: [[P:%.*]] = phi ptr [ @a3, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] -; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; CGSCC-NEXT: br label %[[LOOP:.*]] +; CGSCC: [[LOOP]]: +; CGSCC-NEXT: [[P:%.*]] = phi ptr [ @a3, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ] +; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ] ; CGSCC-NEXT: store i8 1, ptr [[P]], align 1 ; CGSCC-NEXT: [[G]] = getelementptr i8, ptr @a3, i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 -; CGSCC-NEXT: br i1 [[C]], label [[END:%.*]], label [[LOOP]] -; CGSCC: end: +; CGSCC-NEXT: br i1 [[C]], label %[[END:.*]], label %[[LOOP]] +; CGSCC: [[END]]: ; CGSCC-NEXT: [[S31:%.*]] = getelementptr i8, ptr @a3, i64 2 ; CGSCC-NEXT: [[L31:%.*]] = load i8, ptr [[S31]], align 2 ; CGSCC-NEXT: [[S32:%.*]] = getelementptr i8, ptr @a3, i64 3 @@ -2277,15 +2277,15 @@ end: define i8 @cast_and_load_1() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@cast_and_load_1 -; TUNIT-SAME: () #[[ATTR3]] { +; TUNIT-LABEL: define i8 @cast_and_load_1( +; TUNIT-SAME: ) #[[ATTR3]] { ; TUNIT-NEXT: store i32 42, ptr @bytes1, align 4 ; TUNIT-NEXT: [[L:%.*]] = load i8, ptr @bytes1, align 4 ; TUNIT-NEXT: ret i8 [[L]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@cast_and_load_1 -; CGSCC-SAME: () #[[ATTR5]] { +; CGSCC-LABEL: define i8 @cast_and_load_1( +; CGSCC-SAME: ) #[[ATTR5]] { ; CGSCC-NEXT: store i32 42, ptr @bytes1, align 4 ; CGSCC-NEXT: [[L:%.*]] = load i8, ptr @bytes1, align 4 ; CGSCC-NEXT: ret i8 [[L]] @@ -2297,15 +2297,15 @@ define i8 @cast_and_load_1() { define i64 @cast_and_load_2() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@cast_and_load_2 -; TUNIT-SAME: () #[[ATTR3]] { +; TUNIT-LABEL: define i64 @cast_and_load_2( +; TUNIT-SAME: ) #[[ATTR3]] { ; TUNIT-NEXT: store i32 42, ptr @bytes2, align 4 ; TUNIT-NEXT: [[L:%.*]] = load i64, ptr @bytes2, align 4 ; TUNIT-NEXT: ret i64 [[L]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@cast_and_load_2 -; CGSCC-SAME: () #[[ATTR5]] { +; CGSCC-LABEL: define i64 @cast_and_load_2( +; CGSCC-SAME: ) #[[ATTR5]] { ; CGSCC-NEXT: store i32 42, ptr @bytes2, align 4 ; CGSCC-NEXT: [[L:%.*]] = load i64, ptr @bytes2, align 4 ; CGSCC-NEXT: ret i64 [[L]] @@ -2318,33 +2318,33 @@ define i64 @cast_and_load_2() { define void @recursive_load_store(i64 %N, i32 %v) { ; ; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(write) -; TUNIT-LABEL: define {{[^@]+}}@recursive_load_store -; TUNIT-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: br label [[FOR_COND:%.*]] -; TUNIT: for.cond: -; TUNIT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; TUNIT-LABEL: define void @recursive_load_store( +; TUNIT-SAME: i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] { +; TUNIT-NEXT: [[ENTRY:.*]]: +; TUNIT-NEXT: br label %[[FOR_COND:.*]] +; TUNIT: [[FOR_COND]]: +; TUNIT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY:.*]] ], [ 0, %[[ENTRY]] ] ; TUNIT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]] -; TUNIT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] -; TUNIT: for.body: +; TUNIT-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]] +; TUNIT: [[FOR_BODY]]: ; TUNIT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TUNIT-NEXT: br label [[FOR_COND]] -; TUNIT: for.end: +; TUNIT-NEXT: br label %[[FOR_COND]] +; TUNIT: [[FOR_END]]: ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nofree norecurse nosync nounwind memory(write) -; CGSCC-LABEL: define {{[^@]+}}@recursive_load_store -; CGSCC-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR8:[0-9]+]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: br label [[FOR_COND:%.*]] -; CGSCC: for.cond: -; CGSCC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CGSCC-LABEL: define void @recursive_load_store( +; CGSCC-SAME: i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR8:[0-9]+]] { +; CGSCC-NEXT: [[ENTRY:.*]]: +; CGSCC-NEXT: br label %[[FOR_COND:.*]] +; CGSCC: [[FOR_COND]]: +; CGSCC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY:.*]] ], [ 0, %[[ENTRY]] ] ; CGSCC-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]] -; CGSCC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] -; CGSCC: for.body: +; CGSCC-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]] +; CGSCC: [[FOR_BODY]]: ; CGSCC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CGSCC-NEXT: br label [[FOR_COND]] -; CGSCC: for.end: +; CGSCC-NEXT: br label %[[FOR_COND]] +; CGSCC: [[FOR_END]]: ; CGSCC-NEXT: ret void ; entry: @@ -2369,9 +2369,9 @@ for.end: } define dso_local i32 @round_trip_malloc(i32 %x) { -; CHECK-LABEL: define {{[^@]+}}@round_trip_malloc -; CHECK-SAME: (i32 returned [[X:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define dso_local i32 @round_trip_malloc( +; CHECK-SAME: i32 returned [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: store i32 [[X]], ptr [[CALL_H2S]], align 4 ; CHECK-NEXT: ret i32 [[X]] @@ -2385,8 +2385,8 @@ entry: } define dso_local i32 @round_trip_malloc_constant() { -; CHECK-LABEL: define {{[^@]+}}@round_trip_malloc_constant() { -; CHECK-NEXT: entry: +; CHECK-LABEL: define dso_local noundef i32 @round_trip_malloc_constant() { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: ret i32 7 ; entry: @@ -2402,16 +2402,16 @@ declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0) " declare void @free(ptr) allockind("free") "alloc-family"="malloc" define dso_local i32 @conditional_malloc(i32 %x) { -; CHECK-LABEL: define {{[^@]+}}@conditional_malloc -; CHECK-SAME: (i32 returned [[X:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define dso_local i32 @conditional_malloc( +; CHECK-SAME: i32 returned [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; CHECK: if.then: +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 [[X]], ptr [[CALL_H2S]], align 4 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: ; CHECK-NEXT: ret i32 [[X]] ; entry: @@ -2429,9 +2429,9 @@ if.end: ; preds = %if.then, %entry } define dso_local i32 @round_trip_calloc(i32 %x) { -; CHECK-LABEL: define {{[^@]+}}@round_trip_calloc -; CHECK-SAME: (i32 returned [[X:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define dso_local i32 @round_trip_calloc( +; CHECK-SAME: i32 returned [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false) ; CHECK-NEXT: store i32 [[X]], ptr [[CALL_H2S]], align 4 @@ -2445,8 +2445,8 @@ entry: } define dso_local i32 @round_trip_calloc_constant() { -; CHECK-LABEL: define {{[^@]+}}@round_trip_calloc_constant() { -; CHECK-NEXT: entry: +; CHECK-LABEL: define dso_local noundef i32 @round_trip_calloc_constant() { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false) ; CHECK-NEXT: ret i32 11 @@ -2461,17 +2461,17 @@ entry: declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0, 1) "alloc-family"="malloc" define dso_local i32 @conditional_calloc(i32 %x) { -; CHECK-LABEL: define {{[^@]+}}@conditional_calloc -; CHECK-SAME: (i32 [[X:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define dso_local i32 @conditional_calloc( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false) ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 [[X]], ptr [[CALL_H2S]], align 4 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CALL_H2S]], align 4 ; CHECK-NEXT: ret i32 [[TMP0]] ; @@ -2491,15 +2491,15 @@ if.end: ; preds = %if.then, %entry } define dso_local i32 @conditional_calloc_zero(i1 %c) { -; CHECK-LABEL: define {{[^@]+}}@conditional_calloc_zero -; CHECK-SAME: (i1 [[C:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define dso_local noundef i32 @conditional_calloc_zero( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false) -; CHECK-NEXT: br i1 [[C]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: +; CHECK-NEXT: br i1 [[C]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: ; CHECK-NEXT: ret i32 0 ; entry: @@ -2517,16 +2517,16 @@ if.end: ; preds = %if.then, %entry } define dso_local ptr @malloc_like(i32 %s) { -; TUNIT-LABEL: define {{[^@]+}}@malloc_like -; TUNIT-SAME: (i32 [[S:%.*]]) { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define dso_local noalias ptr @malloc_like( +; TUNIT-SAME: i32 [[S:%.*]]) { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[CONV:%.*]] = sext i32 [[S]] to i64 ; TUNIT-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]]) #[[ATTR20:[0-9]+]] ; TUNIT-NEXT: ret ptr [[CALL]] ; -; CGSCC-LABEL: define {{[^@]+}}@malloc_like -; CGSCC-SAME: (i32 [[S:%.*]]) { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define dso_local noalias ptr @malloc_like( +; CGSCC-SAME: i32 [[S:%.*]]) { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[CONV:%.*]] = sext i32 [[S]] to i64 ; CGSCC-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]]) #[[ATTR23:[0-9]+]] ; CGSCC-NEXT: ret ptr [[CALL]] @@ -2538,18 +2538,18 @@ entry: } define dso_local i32 @round_trip_malloc_like(i32 %x) { -; TUNIT-LABEL: define {{[^@]+}}@round_trip_malloc_like -; TUNIT-SAME: (i32 [[X:%.*]]) { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define dso_local i32 @round_trip_malloc_like( +; TUNIT-SAME: i32 [[X:%.*]]) { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[CALL:%.*]] = call noalias ptr @malloc_like(i32 noundef 4) #[[ATTR20]] ; TUNIT-NEXT: store i32 [[X]], ptr [[CALL]], align 4 ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4 ; TUNIT-NEXT: call void @free(ptr noundef nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]] ; TUNIT-NEXT: ret i32 [[TMP0]] ; -; CGSCC-LABEL: define {{[^@]+}}@round_trip_malloc_like -; CGSCC-SAME: (i32 [[X:%.*]]) { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define dso_local i32 @round_trip_malloc_like( +; CGSCC-SAME: i32 [[X:%.*]]) { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[CALL:%.*]] = call noalias ptr @malloc_like(i32 noundef 4) #[[ATTR23]] ; CGSCC-NEXT: store i32 [[X]], ptr [[CALL]], align 4 ; CGSCC-NEXT: [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4 @@ -2565,18 +2565,18 @@ entry: } define dso_local i32 @round_trip_unknown_alloc(i32 %x) { -; TUNIT-LABEL: define {{[^@]+}}@round_trip_unknown_alloc -; TUNIT-SAME: (i32 [[X:%.*]]) { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define dso_local i32 @round_trip_unknown_alloc( +; TUNIT-SAME: i32 [[X:%.*]]) { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR20]] ; TUNIT-NEXT: store i32 [[X]], ptr [[CALL]], align 4 ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4 ; TUNIT-NEXT: call void @free(ptr noundef nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]] ; TUNIT-NEXT: ret i32 [[TMP0]] ; -; CGSCC-LABEL: define {{[^@]+}}@round_trip_unknown_alloc -; CGSCC-SAME: (i32 [[X:%.*]]) { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define dso_local i32 @round_trip_unknown_alloc( +; CGSCC-SAME: i32 [[X:%.*]]) { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR23]] ; CGSCC-NEXT: store i32 [[X]], ptr [[CALL]], align 4 ; CGSCC-NEXT: [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4 @@ -2594,30 +2594,30 @@ entry: declare noalias ptr @unknown_alloc(i32) define dso_local i32 @conditional_unknown_alloc(i32 %x) { -; TUNIT-LABEL: define {{[^@]+}}@conditional_unknown_alloc -; TUNIT-SAME: (i32 [[X:%.*]]) { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define dso_local i32 @conditional_unknown_alloc( +; TUNIT-SAME: i32 [[X:%.*]]) { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR20]] ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 -; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; TUNIT: if.then: +; TUNIT-NEXT: br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; TUNIT: [[IF_THEN]]: ; TUNIT-NEXT: store i32 [[X]], ptr [[CALL]], align 4 -; TUNIT-NEXT: br label [[IF_END]] -; TUNIT: if.end: +; TUNIT-NEXT: br label %[[IF_END]] +; TUNIT: [[IF_END]]: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4 ; TUNIT-NEXT: call void @free(ptr nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]] ; TUNIT-NEXT: ret i32 [[TMP0]] ; -; CGSCC-LABEL: define {{[^@]+}}@conditional_unknown_alloc -; CGSCC-SAME: (i32 [[X:%.*]]) { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define dso_local i32 @conditional_unknown_alloc( +; CGSCC-SAME: i32 [[X:%.*]]) { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR23]] ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 -; CGSCC-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CGSCC: if.then: +; CGSCC-NEXT: br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; CGSCC: [[IF_THEN]]: ; CGSCC-NEXT: store i32 [[X]], ptr [[CALL]], align 4 -; CGSCC-NEXT: br label [[IF_END]] -; CGSCC: if.end: +; CGSCC-NEXT: br label %[[IF_END]] +; CGSCC: [[IF_END]]: ; CGSCC-NEXT: [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4 ; CGSCC-NEXT: call void @free(ptr nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR23]] ; CGSCC-NEXT: ret i32 [[TMP0]] @@ -2643,9 +2643,9 @@ if.end: ; preds = %if.then, %entry ; We mark %dst as writeonly and %src as readonly, that is (for now) all we can expect. define dso_local void @test_nested_memory(ptr %dst, ptr %src) { -; TUNIT-LABEL: define {{[^@]+}}@test_nested_memory -; TUNIT-SAME: (ptr nofree writeonly captures(none) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]]) { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define dso_local void @test_nested_memory( +; TUNIT-SAME: ptr nofree writeonly captures(none) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]]) { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 24, align 1 ; TUNIT-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 ; TUNIT-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2 @@ -2662,9 +2662,9 @@ define dso_local void @test_nested_memory(ptr %dst, ptr %src) { ; TUNIT-NEXT: call fastcc void @nested_memory_callee(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]]) #[[ATTR21:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC-LABEL: define {{[^@]+}}@test_nested_memory -; CGSCC-SAME: (ptr nofree [[DST:%.*]], ptr nofree [[SRC:%.*]]) { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define dso_local void @test_nested_memory( +; CGSCC-SAME: ptr nofree [[DST:%.*]], ptr nofree [[SRC:%.*]]) { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 ; CGSCC-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2 ; CGSCC-NEXT: [[CALL:%.*]] = call noalias dereferenceable_or_null(24) ptr @malloc(i64 noundef 24) @@ -2690,9 +2690,9 @@ entry: define internal fastcc void @nested_memory_callee(ptr nocapture readonly %S) nofree norecurse nounwind uwtable { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable -; TUNIT-LABEL: define {{[^@]+}}@nested_memory_callee -; TUNIT-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) #[[ATTR11:[0-9]+]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define internal fastcc void @nested_memory_callee( +; TUNIT-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) #[[ATTR11:[0-9]+]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[S_PRIV:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 ; TUNIT-NEXT: store ptr [[TMP0]], ptr [[S_PRIV]], align 8 ; TUNIT-NEXT: [[S_PRIV_B8:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 8 @@ -2700,21 +2700,21 @@ define internal fastcc void @nested_memory_callee(ptr nocapture readonly %S) nof ; TUNIT-NEXT: [[S_PRIV_B16:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 16 ; TUNIT-NEXT: store ptr [[TMP2]], ptr [[S_PRIV_B16]], align 8 ; TUNIT-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[S_PRIV]], i64 0, i32 2 -; TUNIT-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INNER]], align 8 +; TUNIT-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INNER]], align 8, !invariant.load [[META32:![0-9]+]] ; TUNIT-NEXT: [[INNER1:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[TMP3]], i64 0, i32 2 -; TUNIT-NEXT: [[TMP4:%.*]] = load ptr, ptr [[INNER1]], align 8 +; TUNIT-NEXT: [[TMP4:%.*]] = load ptr, ptr [[INNER1]], align 8, !invariant.load [[META32]] ; TUNIT-NEXT: [[SRC:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[TMP4]], i64 0, i32 1 -; TUNIT-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SRC]], align 8 -; TUNIT-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +; TUNIT-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SRC]], align 8, !invariant.load [[META32]] +; TUNIT-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8, !invariant.load [[META32]] ; TUNIT-NEXT: [[CONV:%.*]] = fptrunc double [[TMP6]] to float -; TUNIT-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8 +; TUNIT-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8, !invariant.load [[META32]] ; TUNIT-NEXT: store float [[CONV]], ptr [[TMP7]], align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable -; CGSCC-LABEL: define {{[^@]+}}@nested_memory_callee -; CGSCC-SAME: (ptr nofree [[TMP0:%.*]], ptr nofree [[TMP1:%.*]], ptr nofree [[TMP2:%.*]]) #[[ATTR12:[0-9]+]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define internal fastcc void @nested_memory_callee( +; CGSCC-SAME: ptr nofree [[TMP0:%.*]], ptr nofree [[TMP1:%.*]], ptr nofree [[TMP2:%.*]]) #[[ATTR12:[0-9]+]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[S_PRIV:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 ; CGSCC-NEXT: store ptr [[TMP0]], ptr [[S_PRIV]], align 8 ; CGSCC-NEXT: [[S_PRIV_B8:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 8 @@ -2722,14 +2722,14 @@ define internal fastcc void @nested_memory_callee(ptr nocapture readonly %S) nof ; CGSCC-NEXT: [[S_PRIV_B16:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 16 ; CGSCC-NEXT: store ptr [[TMP2]], ptr [[S_PRIV_B16]], align 8 ; CGSCC-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[S_PRIV]], i64 0, i32 2 -; CGSCC-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INNER]], align 8 +; CGSCC-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INNER]], align 8, !invariant.load [[META32:![0-9]+]] ; CGSCC-NEXT: [[INNER1:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[TMP3]], i64 0, i32 2 -; CGSCC-NEXT: [[TMP4:%.*]] = load ptr, ptr [[INNER1]], align 8 +; CGSCC-NEXT: [[TMP4:%.*]] = load ptr, ptr [[INNER1]], align 8, !invariant.load [[META32]] ; CGSCC-NEXT: [[SRC:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[TMP4]], i64 0, i32 1 -; CGSCC-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SRC]], align 8 -; CGSCC-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +; CGSCC-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SRC]], align 8, !invariant.load [[META32]] +; CGSCC-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8, !invariant.load [[META32]] ; CGSCC-NEXT: [[CONV:%.*]] = fptrunc double [[TMP6]] to float -; CGSCC-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CGSCC-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8, !invariant.load [[META32]] ; CGSCC-NEXT: store float [[CONV]], ptr [[TMP7]], align 4 ; CGSCC-NEXT: ret void ; @@ -2751,34 +2751,34 @@ entry: ; varying and the accesses thus not "exact". This used to simplify %cmp12 to true. define hidden void @no_propagation_of_unknown_index_access(ptr %in, ptr %out, i32 %idx) #0 { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -; TUNIT-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access -; TUNIT-SAME: (ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define hidden void @no_propagation_of_unknown_index_access( +; TUNIT-SAME: ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR1]] { +; TUNIT-NEXT: [[ENTRY:.*]]: ; TUNIT-NEXT: [[BUF:%.*]] = alloca [128 x i32], align 16 ; TUNIT-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR17]] -; TUNIT-NEXT: br label [[FOR_COND:%.*]] -; TUNIT: for.cond: -; TUNIT-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; TUNIT-NEXT: br label %[[FOR_COND:.*]] +; TUNIT: [[FOR_COND]]: +; TUNIT-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; TUNIT-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 128 -; TUNIT-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; TUNIT: for.cond.cleanup: -; TUNIT-NEXT: br label [[FOR_COND4:%.*]] -; TUNIT: for.body: +; TUNIT-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; TUNIT: [[FOR_COND_CLEANUP]]: +; TUNIT-NEXT: br label %[[FOR_COND4:.*]] +; TUNIT: [[FOR_BODY]]: ; TUNIT-NEXT: [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64 ; TUNIT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[IDXPROM]] -; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !invariant.load [[META32]] ; TUNIT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM]] ; TUNIT-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX2]], align 4 ; TUNIT-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; TUNIT-NEXT: br label [[FOR_COND]], !llvm.loop [[TBAA10]] -; TUNIT: for.cond4: -; TUNIT-NEXT: [[I3_0:%.*]] = phi i32 [ 0, [[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], [[FOR_BODY7:%.*]] ] +; TUNIT-NEXT: br label %[[FOR_COND]], !llvm.loop [[FLOAT_TBAA10]] +; TUNIT: [[FOR_COND4]]: +; TUNIT-NEXT: [[I3_0:%.*]] = phi i32 [ 0, %[[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], %[[FOR_BODY7:.*]] ] ; TUNIT-NEXT: [[CMP5:%.*]] = icmp slt i32 [[I3_0]], 128 -; TUNIT-NEXT: br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6:%.*]] -; TUNIT: for.cond.cleanup6: +; TUNIT-NEXT: br i1 [[CMP5]], label %[[FOR_BODY7]], label %[[FOR_COND_CLEANUP6:.*]] +; TUNIT: [[FOR_COND_CLEANUP6]]: ; TUNIT-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR17]] ; TUNIT-NEXT: ret void -; TUNIT: for.body7: +; TUNIT: [[FOR_BODY7]]: ; TUNIT-NEXT: [[IDXPROM8:%.*]] = sext i32 [[I3_0]] to i64 ; TUNIT-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM8]] ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 @@ -2790,37 +2790,37 @@ define hidden void @no_propagation_of_unknown_index_access(ptr %in, ptr %out, i3 ; TUNIT-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDXPROM8]] ; TUNIT-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX14]], align 4 ; TUNIT-NEXT: [[INC16]] = add nsw i32 [[I3_0]], 1 -; TUNIT-NEXT: br label [[FOR_COND4]], !llvm.loop [[TBAA12]] +; TUNIT-NEXT: br label %[[FOR_COND4]], !llvm.loop [[INT_TBAA12]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -; CGSCC-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access -; CGSCC-SAME: (ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR13:[0-9]+]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define hidden void @no_propagation_of_unknown_index_access( +; CGSCC-SAME: ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR13:[0-9]+]] { +; CGSCC-NEXT: [[ENTRY:.*]]: ; CGSCC-NEXT: [[BUF:%.*]] = alloca [128 x i32], align 16 ; CGSCC-NEXT: call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR20]] -; CGSCC-NEXT: br label [[FOR_COND:%.*]] -; CGSCC: for.cond: -; CGSCC-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CGSCC-NEXT: br label %[[FOR_COND:.*]] +; CGSCC: [[FOR_COND]]: +; CGSCC-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; CGSCC-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 128 -; CGSCC-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; CGSCC: for.cond.cleanup: -; CGSCC-NEXT: br label [[FOR_COND4:%.*]] -; CGSCC: for.body: +; CGSCC-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; CGSCC: [[FOR_COND_CLEANUP]]: +; CGSCC-NEXT: br label %[[FOR_COND4:.*]] +; CGSCC: [[FOR_BODY]]: ; CGSCC-NEXT: [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64 ; CGSCC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[IDXPROM]] -; CGSCC-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CGSCC-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !invariant.load [[META32]] ; CGSCC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM]] ; CGSCC-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX2]], align 4 ; CGSCC-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; CGSCC-NEXT: br label [[FOR_COND]], !llvm.loop [[TBAA10]] -; CGSCC: for.cond4: -; CGSCC-NEXT: [[I3_0:%.*]] = phi i32 [ 0, [[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], [[FOR_BODY7:%.*]] ] +; CGSCC-NEXT: br label %[[FOR_COND]], !llvm.loop [[FLOAT_TBAA10]] +; CGSCC: [[FOR_COND4]]: +; CGSCC-NEXT: [[I3_0:%.*]] = phi i32 [ 0, %[[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], %[[FOR_BODY7:.*]] ] ; CGSCC-NEXT: [[CMP5:%.*]] = icmp slt i32 [[I3_0]], 128 -; CGSCC-NEXT: br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6:%.*]] -; CGSCC: for.cond.cleanup6: +; CGSCC-NEXT: br i1 [[CMP5]], label %[[FOR_BODY7]], label %[[FOR_COND_CLEANUP6:.*]] +; CGSCC: [[FOR_COND_CLEANUP6]]: ; CGSCC-NEXT: call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR20]] ; CGSCC-NEXT: ret void -; CGSCC: for.body7: +; CGSCC: [[FOR_BODY7]]: ; CGSCC-NEXT: [[IDXPROM8:%.*]] = sext i32 [[I3_0]] to i64 ; CGSCC-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM8]] ; CGSCC-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 @@ -2832,7 +2832,7 @@ define hidden void @no_propagation_of_unknown_index_access(ptr %in, ptr %out, i3 ; CGSCC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDXPROM8]] ; CGSCC-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX14]], align 4 ; CGSCC-NEXT: [[INC16]] = add nsw i32 [[I3_0]], 1 -; CGSCC-NEXT: br label [[FOR_COND4]], !llvm.loop [[TBAA12]] +; CGSCC-NEXT: br label %[[FOR_COND4]], !llvm.loop [[INT_TBAA12]] ; entry: %buf = alloca [128 x i32], align 16 @@ -2883,30 +2883,30 @@ for.body7: ; preds = %for.cond4 ; Ensure we do not return true. define internal i1 @alloca_non_unique(ptr %p, i32 %in, i1 %c) { ; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) -; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique -; TUNIT-SAME: (ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR12:[0-9]+]] { +; TUNIT-LABEL: define internal i1 @alloca_non_unique( +; TUNIT-SAME: ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR12:[0-9]+]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: store i32 [[IN]], ptr [[A]], align 4 -; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] -; TUNIT: t: +; TUNIT-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]] +; TUNIT: [[T]]: ; TUNIT-NEXT: [[R:%.*]] = call i1 @alloca_non_unique(ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR14:[0-9]+]] ; TUNIT-NEXT: ret i1 [[R]] -; TUNIT: f: -; TUNIT-NEXT: [[L:%.*]] = load i32, ptr [[P]], align 4 +; TUNIT: [[F]]: +; TUNIT-NEXT: [[L:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META32]] ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]] ; TUNIT-NEXT: ret i1 [[CMP]] ; ; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) -; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique -; CGSCC-SAME: (ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR14:[0-9]+]] { +; CGSCC-LABEL: define internal i1 @alloca_non_unique( +; CGSCC-SAME: ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR14:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: store i32 [[IN]], ptr [[A]], align 4 -; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] -; CGSCC: t: +; CGSCC-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]] +; CGSCC: [[T]]: ; CGSCC-NEXT: [[R:%.*]] = call i1 @alloca_non_unique(ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR17:[0-9]+]] ; CGSCC-NEXT: ret i1 [[R]] -; CGSCC: f: -; CGSCC-NEXT: [[L:%.*]] = load i32, ptr [[P]], align 4 +; CGSCC: [[F]]: +; CGSCC-NEXT: [[L:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META32]] ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]] ; CGSCC-NEXT: ret i1 [[CMP]] ; @@ -2925,14 +2925,14 @@ f: ; Ensure we do not return true. define i1 @alloca_non_unique_caller(i32 %in, i1 %c) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) -; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique_caller -; TUNIT-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR13:[0-9]+]] { +; TUNIT-LABEL: define i1 @alloca_non_unique_caller( +; TUNIT-SAME: i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR13:[0-9]+]] { ; TUNIT-NEXT: [[R:%.*]] = call i1 @alloca_non_unique(ptr undef, i32 [[IN]], i1 noundef [[C]]) #[[ATTR14]] ; TUNIT-NEXT: ret i1 [[R]] ; ; CGSCC: Function Attrs: nofree nosync nounwind memory(none) -; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique_caller -; CGSCC-SAME: (i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR15:[0-9]+]] { +; CGSCC-LABEL: define i1 @alloca_non_unique_caller( +; CGSCC-SAME: i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR15:[0-9]+]] { ; CGSCC-NEXT: [[R:%.*]] = call i1 @alloca_non_unique(ptr nofree undef, i32 [[IN]], i1 noundef [[C]]) #[[ATTR25:[0-9]+]] ; CGSCC-NEXT: ret i1 [[R]] ; @@ -2943,8 +2943,8 @@ define i1 @alloca_non_unique_caller(i32 %in, i1 %c) { ; Ensure we do not return %bad or %l, but %sel define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal -; TUNIT-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR4]] { +; TUNIT-LABEL: define i32 @scope_value_traversal( +; TUNIT-SAME: i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: store i32 [[BAD]], ptr [[A]], align 4 ; TUNIT-NEXT: call void @scope_value_traversal_helper(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR22:[0-9]+]] @@ -2953,8 +2953,8 @@ define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) { ; TUNIT-NEXT: ret i32 [[SEL]] ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal -; CGSCC-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR16:[0-9]+]] { +; CGSCC-LABEL: define i32 @scope_value_traversal( +; CGSCC-SAME: i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR16:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: store i32 [[BAD]], ptr [[A]], align 4 ; CGSCC-NEXT: call void @scope_value_traversal_helper(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR26:[0-9]+]] @@ -2972,16 +2972,16 @@ define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) { define void @scope_value_traversal_helper(ptr %a, i1 %c) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal_helper -; TUNIT-SAME: (ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR1]] { +; TUNIT-LABEL: define void @scope_value_traversal_helper( +; TUNIT-SAME: ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, ptr [[A]], align 4 ; TUNIT-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 [[L]], i32 42 ; TUNIT-NEXT: store i32 [[SEL]], ptr [[A]], align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal_helper -; CGSCC-SAME: (ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR13]] { +; CGSCC-LABEL: define void @scope_value_traversal_helper( +; CGSCC-SAME: ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR13]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, ptr [[A]], align 4 ; CGSCC-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 [[L]], i32 42 ; CGSCC-NEXT: store i32 [[SEL]], ptr [[A]], align 4 @@ -2995,9 +2995,9 @@ define void @scope_value_traversal_helper(ptr %a, i1 %c) { define i8 @gep_index_from_binary_operator(i1 %cnd1, i1 %cnd2) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@gep_index_from_binary_operator -; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define noundef i8 @gep_index_from_binary_operator( +; CHECK-SAME: i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12 ; CHECK-NEXT: ret i8 100 @@ -3014,9 +3014,9 @@ entry: define i8 @gep_index_from_memory(i1 %cnd1, i1 %cnd2) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@gep_index_from_memory -; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define i8 @gep_index_from_memory( +; CHECK-SAME: i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CHECK-NEXT: [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12 ; CHECK-NEXT: ret i8 100 @@ -3040,27 +3040,27 @@ entry: ; Ensure this is not flattened to return 3 define i32 @a(i1 %c) { ; TUNIT: Function Attrs: nofree nosync nounwind -; TUNIT-LABEL: define {{[^@]+}}@a -; TUNIT-SAME: (i1 noundef [[C:%.*]]) #[[ATTR14]] { +; TUNIT-LABEL: define noundef i32 @a( +; TUNIT-SAME: i1 noundef [[C:%.*]]) #[[ATTR14]] { ; TUNIT-NEXT: store i32 3, ptr @G, align 4 -; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] -; TUNIT: t: +; TUNIT-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]] +; TUNIT: [[T]]: ; TUNIT-NEXT: [[REC:%.*]] = call i32 @a(i1 noundef false) #[[ATTR14]] -; TUNIT-NEXT: br label [[F]] -; TUNIT: f: +; TUNIT-NEXT: br label %[[F]] +; TUNIT: [[F]]: ; TUNIT-NEXT: [[R:%.*]] = load i32, ptr @G, align 4 ; TUNIT-NEXT: store i32 5, ptr @G, align 4 ; TUNIT-NEXT: ret i32 [[R]] ; ; CGSCC: Function Attrs: nofree nosync nounwind -; CGSCC-LABEL: define {{[^@]+}}@a -; CGSCC-SAME: (i1 noundef [[C:%.*]]) #[[ATTR17]] { +; CGSCC-LABEL: define noundef i32 @a( +; CGSCC-SAME: i1 noundef [[C:%.*]]) #[[ATTR17]] { ; CGSCC-NEXT: store i32 3, ptr @G, align 4 -; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] -; CGSCC: t: +; CGSCC-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]] +; CGSCC: [[T]]: ; CGSCC-NEXT: [[REC:%.*]] = call i32 @a(i1 noundef false) #[[ATTR17]] -; CGSCC-NEXT: br label [[F]] -; CGSCC: f: +; CGSCC-NEXT: br label %[[F]] +; CGSCC: [[F]]: ; CGSCC-NEXT: [[R:%.*]] = load i32, ptr @G, align 4 ; CGSCC-NEXT: store i32 5, ptr @G, align 4 ; CGSCC-NEXT: ret i32 [[R]] @@ -3081,22 +3081,22 @@ f: @GC = internal global i32 undef, align 4 define void @atomicrmw(ptr %p, i32 %i, i1 %cnd) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@atomicrmw -; TUNIT-SAME: (ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR3]] { -; TUNIT-NEXT: br i1 [[CND]], label [[T:%.*]], label [[M:%.*]] -; TUNIT: t: -; TUNIT-NEXT: br label [[M]] -; TUNIT: m: +; TUNIT-LABEL: define void @atomicrmw( +; TUNIT-SAME: ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR3]] { +; TUNIT-NEXT: br i1 [[CND]], label %[[T:.*]], label %[[M:.*]] +; TUNIT: [[T]]: +; TUNIT-NEXT: br label %[[M]] +; TUNIT: [[M]]: ; TUNIT-NEXT: [[ARMW:%.*]] = atomicrmw add ptr @GC, i32 [[I]] monotonic, align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@atomicrmw -; CGSCC-SAME: (ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR5]] { -; CGSCC-NEXT: br i1 [[CND]], label [[T:%.*]], label [[M:%.*]] -; CGSCC: t: -; CGSCC-NEXT: br label [[M]] -; CGSCC: m: +; CGSCC-LABEL: define void @atomicrmw( +; CGSCC-SAME: ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR5]] { +; CGSCC-NEXT: br i1 [[CND]], label %[[T:.*]], label %[[M:.*]] +; CGSCC: [[T]]: +; CGSCC-NEXT: br label %[[M]] +; CGSCC: [[M]]: ; CGSCC-NEXT: [[ARMW:%.*]] = atomicrmw add ptr @GC, i32 [[I]] monotonic, align 4 ; CGSCC-NEXT: ret void ; @@ -3123,24 +3123,24 @@ m: define i32 @recSimplify(i32 %v, i1 %cond) { ; TUNIT: Function Attrs: nofree nosync nounwind -; TUNIT-LABEL: define {{[^@]+}}@recSimplify -; TUNIT-SAME: (i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR14]] { -; TUNIT-NEXT: br i1 [[COND]], label [[REC:%.*]], label [[COMP:%.*]] -; TUNIT: rec: +; TUNIT-LABEL: define i32 @recSimplify( +; TUNIT-SAME: i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR14]] { +; TUNIT-NEXT: br i1 [[COND]], label %[[REC:.*]], label %[[COMP:.*]] +; TUNIT: [[REC]]: ; TUNIT-NEXT: [[RV:%.*]] = call i32 @recSimplify(i32 undef, i1 noundef false) #[[ATTR14]] ; TUNIT-NEXT: ret i32 1 -; TUNIT: comp: +; TUNIT: [[COMP]]: ; TUNIT-NEXT: store i32 1, ptr @GRS2, align 4 ; TUNIT-NEXT: ret i32 1 ; ; CGSCC: Function Attrs: nofree nosync nounwind -; CGSCC-LABEL: define {{[^@]+}}@recSimplify -; CGSCC-SAME: (i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR17]] { -; CGSCC-NEXT: br i1 [[COND]], label [[REC:%.*]], label [[COMP:%.*]] -; CGSCC: rec: +; CGSCC-LABEL: define i32 @recSimplify( +; CGSCC-SAME: i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR17]] { +; CGSCC-NEXT: br i1 [[COND]], label %[[REC:.*]], label %[[COMP:.*]] +; CGSCC: [[REC]]: ; CGSCC-NEXT: [[RV:%.*]] = call i32 @recSimplify(i32 [[V]], i1 noundef false) #[[ATTR17]] ; CGSCC-NEXT: ret i32 [[RV]] -; CGSCC: comp: +; CGSCC: [[COMP]]: ; CGSCC-NEXT: store i32 [[V]], ptr @GRS, align 4 ; CGSCC-NEXT: store i32 1, ptr @GRS2, align 4 ; CGSCC-NEXT: [[L:%.*]] = load i32, ptr @GRS, align 4 @@ -3167,8 +3167,8 @@ comp: define internal i32 @recSimplify2() { ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read) -; CGSCC-LABEL: define {{[^@]+}}@recSimplify2 -; CGSCC-SAME: () #[[ATTR7]] { +; CGSCC-LABEL: define internal i32 @recSimplify2( +; CGSCC-SAME: ) #[[ATTR7]] { ; CGSCC-NEXT: [[R:%.*]] = load i32, ptr @GRS, align 4 ; CGSCC-NEXT: ret i32 [[R]] ; @@ -3179,18 +3179,18 @@ define internal i32 @recSimplify2() { ; Verify we do not return 10. define i32 @may_access_after_return(i32 noundef %N, i32 noundef %M) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return -; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define noundef i32 @may_access_after_return( +; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR18]] ; TUNIT-NEXT: ret i32 8 ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return -; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define i32 @may_access_after_return( +; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: call void @write_both(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR21]] @@ -3213,9 +3213,9 @@ entry: define internal void @write_both(ptr noundef %Q, ptr noundef %R) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) -; CHECK-LABEL: define {{[^@]+}}@write_both -; CHECK-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Q:%.*]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[R:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define internal void @write_both( +; CHECK-SAME: ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Q:%.*]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[R:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: store i32 3, ptr [[Q]], align 4 ; CHECK-NEXT: store i32 5, ptr [[R]], align 4 ; CHECK-NEXT: ret void @@ -3228,9 +3228,9 @@ entry: define internal ptr @passthrough(ptr noundef %P) { ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@passthrough -; CGSCC-SAME: (ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough( +; CGSCC-SAME: ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: ret ptr [[P]] ; entry: @@ -3240,9 +3240,9 @@ entry: ; Verify we do not return 10. define i32 @may_access_after_return_choice(i32 noundef %N, i32 noundef %M, i1 %c) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return_choice -; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR4]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define noundef i32 @may_access_after_return_choice( +; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR4]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[B]]) #[[ATTR23:[0-9]+]] @@ -3254,9 +3254,9 @@ define i32 @may_access_after_return_choice(i32 noundef %N, i32 noundef %M, i1 %c ; TUNIT-NEXT: ret i32 [[ADD]] ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_choice -; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define i32 @may_access_after_return_choice( +; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR3]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) [[A]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) [[B]]) #[[ATTR28:[0-9]+]] @@ -3281,9 +3281,9 @@ entry: define internal ptr @passthrough_choice(i1 %c, ptr noundef %P, ptr noundef %Q) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@passthrough_choice -; CHECK-SAME: (i1 [[C:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough_choice( +; CHECK-SAME: i1 [[C:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], ptr [[P]], ptr [[Q]] ; CHECK-NEXT: ret ptr [[R]] ; @@ -3295,18 +3295,18 @@ entry: ; Verify we do not return 10. define i32 @may_access_after_return_no_choice1(i32 noundef %N, i32 noundef %M) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return_no_choice1 -; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define noundef i32 @may_access_after_return_no_choice1( +; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR18]] ; TUNIT-NEXT: ret i32 8 ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_no_choice1 -; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define i32 @may_access_after_return_no_choice1( +; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: call void @write_both(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR21]] @@ -3330,18 +3330,18 @@ entry: ; Verify we do not return 10. define i32 @may_access_after_return_no_choice2(i32 noundef %N, i32 noundef %M) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return_no_choice2 -; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] { -; TUNIT-NEXT: entry: +; TUNIT-LABEL: define noundef i32 @may_access_after_return_no_choice2( +; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] { +; TUNIT-NEXT: [[ENTRY:.*:]] ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]]) #[[ATTR18]] ; TUNIT-NEXT: ret i32 8 ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_no_choice2 -; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define i32 @may_access_after_return_no_choice2( +; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: call void @write_both(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]], ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]]) #[[ATTR21]] @@ -3364,9 +3364,9 @@ entry: define internal ptr @passthrough_no_choice_true(i1 %c, ptr noundef %P, ptr noundef %Q) { ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@passthrough_no_choice_true -; CGSCC-SAME: (ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], i32 [[TMP0:%.*]]) #[[ATTR4]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough_no_choice_true( +; CGSCC-SAME: ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], i32 [[TMP0:%.*]]) #[[ATTR4]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[Q_PRIV:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: store i32 [[TMP0]], ptr [[Q_PRIV]], align 4 ; CGSCC-NEXT: ret ptr [[P]] @@ -3377,9 +3377,9 @@ entry: } define internal ptr @passthrough_no_choice_false(i1 %c, ptr noundef %P, ptr noundef %Q) { ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@passthrough_no_choice_false -; CGSCC-SAME: (i32 [[TMP0:%.*]], ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] { -; CGSCC-NEXT: entry: +; CGSCC-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough_no_choice_false( +; CGSCC-SAME: i32 [[TMP0:%.*]], ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] { +; CGSCC-NEXT: [[ENTRY:.*:]] ; CGSCC-NEXT: [[P_PRIV:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: store i32 [[TMP0]], ptr [[P_PRIV]], align 4 ; CGSCC-NEXT: ret ptr [[Q]] @@ -3391,8 +3391,8 @@ entry: define ptr @move2(ptr %p) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@move2 -; CHECK-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] { +; CHECK-LABEL: define ptr @move2( +; CHECK-SAME: ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i32 2 ; CHECK-NEXT: ret ptr [[G]] ; @@ -3401,8 +3401,8 @@ define ptr @move2(ptr %p) { } define internal ptr @move4(ptr %p) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@move4 -; CHECK-SAME: (ptr noalias nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] { +; CHECK-LABEL: define internal ptr @move4( +; CHECK-SAME: ptr noalias nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i32 4 ; CHECK-NEXT: ret ptr [[G]] ; @@ -3412,20 +3412,20 @@ define internal ptr @move4(ptr %p) { define ptr @move246(i32 %i, ptr %p) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@move246 -; CHECK-SAME: (i32 [[I:%.*]], ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] { +; CHECK-LABEL: define ptr @move246( +; CHECK-SAME: i32 [[I:%.*]], ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[I]], 0 -; CHECK-NEXT: br i1 [[C0]], label [[BG2:%.*]], label [[BG46:%.*]] -; CHECK: bg2: +; CHECK-NEXT: br i1 [[C0]], label %[[BG2:.*]], label %[[BG46:.*]] +; CHECK: [[BG2]]: ; CHECK-NEXT: [[G2:%.*]] = getelementptr i8, ptr [[P]], i32 2 ; CHECK-NEXT: ret ptr [[G2]] -; CHECK: bg46: +; CHECK: [[BG46]]: ; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[I]], 1 -; CHECK-NEXT: br i1 [[C1]], label [[BG4:%.*]], label [[BG6:%.*]] -; CHECK: bg4: +; CHECK-NEXT: br i1 [[C1]], label %[[BG4:.*]], label %[[BG6:.*]] +; CHECK: [[BG4]]: ; CHECK-NEXT: [[G4:%.*]] = getelementptr i8, ptr [[P]], i32 4 ; CHECK-NEXT: ret ptr [[G4]] -; CHECK: bg6: +; CHECK: [[BG6]]: ; CHECK-NEXT: [[G6:%.*]] = getelementptr i8, ptr [[P]], i32 6 ; CHECK-NEXT: ret ptr [[G6]] ; @@ -3448,7 +3448,7 @@ bg6: declare void @use3i8(i8, i8, i8) define void @returnedPtrAccesses() { -; TUNIT-LABEL: define {{[^@]+}}@returnedPtrAccesses() { +; TUNIT-LABEL: define void @returnedPtrAccesses() { ; TUNIT-NEXT: [[A:%.*]] = alloca i64, align 8 ; TUNIT-NEXT: [[A2:%.*]] = call ptr @move2(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR23]] ; TUNIT-NEXT: [[A4:%.*]] = call ptr @move4(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR23]] @@ -3459,7 +3459,7 @@ define void @returnedPtrAccesses() { ; TUNIT-NEXT: call void @use3i8(i8 2, i8 4, i8 6) ; TUNIT-NEXT: ret void ; -; CGSCC-LABEL: define {{[^@]+}}@returnedPtrAccesses() { +; CGSCC-LABEL: define void @returnedPtrAccesses() { ; CGSCC-NEXT: [[A:%.*]] = alloca i64, align 8 ; CGSCC-NEXT: [[A2:%.*]] = call nonnull dereferenceable(1) ptr @move2(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) [[A]]) #[[ATTR20]] ; CGSCC-NEXT: [[A4:%.*]] = call ptr @move4(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) [[A]]) #[[ATTR20]] @@ -3494,16 +3494,16 @@ define void @returnedPtrAccesses() { } define void @returnedPtrAccessesMultiple(i32 %i) { -; TUNIT-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple -; TUNIT-SAME: (i32 [[I:%.*]]) { +; TUNIT-LABEL: define void @returnedPtrAccessesMultiple( +; TUNIT-SAME: i32 [[I:%.*]]) { ; TUNIT-NEXT: [[A:%.*]] = alloca i64, align 8 ; TUNIT-NEXT: [[AP:%.*]] = call ptr @move246(i32 [[I]], ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR23]] ; TUNIT-NEXT: store i8 2, ptr [[AP]], align 1 ; TUNIT-NEXT: call void @use3i8(i8 2, i8 2, i8 2) ; TUNIT-NEXT: ret void ; -; CGSCC-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple -; CGSCC-SAME: (i32 [[I:%.*]]) { +; CGSCC-LABEL: define void @returnedPtrAccessesMultiple( +; CGSCC-SAME: i32 [[I:%.*]]) { ; CGSCC-NEXT: [[A:%.*]] = alloca i64, align 8 ; CGSCC-NEXT: [[AP:%.*]] = call ptr @move246(i32 [[I]], ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) [[A]]) #[[ATTR20]] ; CGSCC-NEXT: [[G2:%.*]] = getelementptr i8, ptr [[A]], i32 2 @@ -3530,8 +3530,8 @@ define void @returnedPtrAccessesMultiple(i32 %i) { } define void @returnedPtrAccessesMultiple2(i32 %i) { -; TUNIT-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple2 -; TUNIT-SAME: (i32 [[I:%.*]]) { +; TUNIT-LABEL: define void @returnedPtrAccessesMultiple2( +; TUNIT-SAME: i32 [[I:%.*]]) { ; TUNIT-NEXT: [[A:%.*]] = alloca i64, align 8 ; TUNIT-NEXT: [[G2:%.*]] = getelementptr i8, ptr [[A]], i32 2 ; TUNIT-NEXT: [[G4:%.*]] = getelementptr i8, ptr [[A]], i32 4 @@ -3547,8 +3547,8 @@ define void @returnedPtrAccessesMultiple2(i32 %i) { ; TUNIT-NEXT: call void @use3i8(i8 noundef [[L2]], i8 noundef [[L4]], i8 noundef [[L6]]) ; TUNIT-NEXT: ret void ; -; CGSCC-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple2 -; CGSCC-SAME: (i32 [[I:%.*]]) { +; CGSCC-LABEL: define void @returnedPtrAccessesMultiple2( +; CGSCC-SAME: i32 [[I:%.*]]) { ; CGSCC-NEXT: [[A:%.*]] = alloca i64, align 8 ; CGSCC-NEXT: [[G2:%.*]] = getelementptr i8, ptr [[A]], i32 2 ; CGSCC-NEXT: [[G4:%.*]] = getelementptr i8, ptr [[A]], i32 4 @@ -3677,57 +3677,58 @@ declare void @llvm.assume(i1 noundef) ; TUNIT: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1} ; TUNIT: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; TUNIT: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; TUNIT: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} ; TUNIT: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} ; TUNIT: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} ; TUNIT: [[META6]] = !{!"Simple C/C++ TBAA"} -; TUNIT: [[TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12} +; TUNIT: [[FLOAT_TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12} ; TUNIT: [[META8]] = !{!"S", [[META4]], i64 0, [[META4]], i64 4, [[META4]], i64 8, [[META9]], i64 12, [[META9]], i64 16, [[META9]], i64 20} ; TUNIT: [[META9]] = !{!"float", [[META5]], i64 0} -; TUNIT: [[TBAA10]] = !{[[META8]], [[META9]], i64 16} -; TUNIT: [[TBAA11]] = !{[[META8]], [[META9]], i64 20} -; TUNIT: [[TBAA12]] = !{[[META8]], [[META4]], i64 0} -; TUNIT: [[TBAA13]] = !{[[META8]], [[META4]], i64 4} -; TUNIT: [[TBAA14]] = !{[[META8]], [[META4]], i64 8} +; TUNIT: [[FLOAT_TBAA10]] = !{[[META8]], [[META9]], i64 16} +; TUNIT: [[FLOAT_TBAA11]] = !{[[META8]], [[META9]], i64 20} +; TUNIT: [[INT_TBAA12]] = !{[[META8]], [[META4]], i64 0} +; TUNIT: [[INT_TBAA13]] = !{[[META8]], [[META4]], i64 4} +; TUNIT: [[INT_TBAA14]] = !{[[META8]], [[META4]], i64 8} ; TUNIT: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]]} ; TUNIT: [[META16]] = !{!"llvm.loop.mustprogress"} ; TUNIT: [[LOOP17]] = distinct !{[[LOOP17]], [[META16]]} ; TUNIT: [[LOOP18]] = distinct !{[[LOOP18]], [[META16]]} -; TUNIT: [[TBAA19]] = !{[[META5]], [[META5]], i64 0} +; TUNIT: [[CHAR_TBAA19]] = !{[[META5]], [[META5]], i64 0} ; TUNIT: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]]} ; TUNIT: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]} ; TUNIT: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]]} ; TUNIT: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]]} ; TUNIT: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]]} ; TUNIT: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]]} -; TUNIT: [[TBAA26]] = !{[[META9]], [[META9]], i64 0} +; TUNIT: [[FLOAT_TBAA26]] = !{[[META9]], [[META9]], i64 0} ; TUNIT: [[LOOP27]] = distinct !{[[LOOP27]], [[META16]]} -; TUNIT: [[TBAA28]] = !{[[META29:![0-9]+]], [[META29]], i64 0} +; TUNIT: [[LONG_LONG_TBAA28]] = !{[[META29:![0-9]+]], [[META29]], i64 0} ; TUNIT: [[META29]] = !{!"long long", [[META5]], i64 0} ; TUNIT: [[LOOP30]] = distinct !{[[LOOP30]], [[META16]]} ; TUNIT: [[LOOP31]] = distinct !{[[LOOP31]], [[META16]]} +; TUNIT: [[META32]] = !{} ;. ; CGSCC: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1} ; CGSCC: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; CGSCC: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CGSCC: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} ; CGSCC: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} ; CGSCC: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} ; CGSCC: [[META6]] = !{!"Simple C/C++ TBAA"} -; CGSCC: [[TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12} +; CGSCC: [[FLOAT_TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12} ; CGSCC: [[META8]] = !{!"S", [[META4]], i64 0, [[META4]], i64 4, [[META4]], i64 8, [[META9]], i64 12, [[META9]], i64 16, [[META9]], i64 20} ; CGSCC: [[META9]] = !{!"float", [[META5]], i64 0} -; CGSCC: [[TBAA10]] = !{[[META8]], [[META9]], i64 16} -; CGSCC: [[TBAA11]] = !{[[META8]], [[META9]], i64 20} -; CGSCC: [[TBAA12]] = !{[[META8]], [[META4]], i64 0} -; CGSCC: [[TBAA13]] = !{[[META8]], [[META4]], i64 4} -; CGSCC: [[TBAA14]] = !{[[META8]], [[META4]], i64 8} -; CGSCC: [[TBAA15]] = !{[[META5]], [[META5]], i64 0} +; CGSCC: [[FLOAT_TBAA10]] = !{[[META8]], [[META9]], i64 16} +; CGSCC: [[FLOAT_TBAA11]] = !{[[META8]], [[META9]], i64 20} +; CGSCC: [[INT_TBAA12]] = !{[[META8]], [[META4]], i64 0} +; CGSCC: [[INT_TBAA13]] = !{[[META8]], [[META4]], i64 4} +; CGSCC: [[INT_TBAA14]] = !{[[META8]], [[META4]], i64 8} +; CGSCC: [[CHAR_TBAA15]] = !{[[META5]], [[META5]], i64 0} ; CGSCC: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]]} ; CGSCC: [[META17]] = !{!"llvm.loop.mustprogress"} -; CGSCC: [[TBAA18]] = !{[[META9]], [[META9]], i64 0} +; CGSCC: [[FLOAT_TBAA18]] = !{[[META9]], [[META9]], i64 0} ; CGSCC: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]]} -; CGSCC: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; CGSCC: [[LONG_LONG_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; CGSCC: [[META21]] = !{!"long long", [[META5]], i64 0} ; CGSCC: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]]} ; CGSCC: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]]} @@ -3739,4 +3740,5 @@ declare void @llvm.assume(i1 noundef) ; CGSCC: [[LOOP29]] = distinct !{[[LOOP29]], [[META17]]} ; CGSCC: [[LOOP30]] = distinct !{[[LOOP30]], [[META17]]} ; CGSCC: [[LOOP31]] = distinct !{[[LOOP31]], [[META17]]} +; CGSCC: [[META32]] = !{} ;. diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll b/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll index 9dba73a1beb77..7348df38d4de8 100644 --- a/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll +++ b/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -o - -passes=gvn %s | FileCheck %s --check-prefixes=CHECK,MDEP ; RUN: opt -S -o - -passes='gvn' %s | FileCheck %s --check-prefixes=CHECK,MSSA @@ -13,32 +13,33 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; Check that GVN doesn't determine %2 is partially redundant. define i32 @volatile_load(i32 %n) { -; CHECK-LABEL: @volatile_load( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @a2, align 8, !tbaa [[TBAA5:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[TBAA5]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[S_09:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[P_08:%.*]] = phi ptr [ [[TMP0]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[P_08]], align 4, !tbaa [[TBAA9:![0-9]+]] +; CHECK-LABEL: define i32 @volatile_load( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_BODY_LR_PH]]: +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @a2, align 8, !tbaa [[ANYPTR_TBAA5:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr @a, align 8, !tbaa [[ANYPTR_TBAA5]] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[S_09:%.*]] = phi i32 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[P_08:%.*]] = phi ptr [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[P_08]], align 4, !tbaa [[INT_TBAA9:![0-9]+]] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[TMP2]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA9]] -; CHECK-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[P_08]], align 4, !tbaa [[TBAA9]] +; CHECK-NEXT: store i32 [[TMP2]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA9]] +; CHECK-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[P_08]], align 4, !tbaa [[INT_TBAA9]] ; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP3]], [[S_09]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[P_08]], i64 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] -; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_FOR_END_CRIT_EDGE:.*]] +; CHECK: [[FOR_COND_FOR_END_CRIT_EDGE]]: +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: ret i32 [[S_0_LCSSA]] ; entry: @@ -78,22 +79,23 @@ for.end: ; But we should not widen %0 to 64-bit load. define i32 @overaligned_load(i32 %a, ptr nocapture %b) !dbg !13 { -; CHECK-LABEL: @overaligned_load( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0, !dbg [[DBG14:![0-9]+]] -; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !dbg [[DBG14]] -; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @s1, align 8, !dbg [[DBG15:![0-9]+]], !tbaa [[TBAA9]] -; CHECK-NEXT: br label [[IF_END:%.*]], !dbg [[DBG15]] -; CHECK: if.else: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 2, !dbg [[DBG16:![0-9]+]] -; CHECK-NEXT: store i32 10, ptr [[ARRAYIDX]], align 4, !dbg [[DBG16]], !tbaa [[TBAA9]] -; CHECK-NEXT: br label [[IF_END]], !dbg [[DBG16]] -; CHECK: if.end: -; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[TMP0]], [[IF_THEN]] ], [ 0, [[IF_ELSE]] ] -; CHECK-NEXT: [[P_0:%.*]] = phi ptr [ @s1, [[IF_THEN]] ], [ [[B]], [[IF_ELSE]] ] +; CHECK-LABEL: define i32 @overaligned_load( +; CHECK-SAME: i32 [[A:%.*]], ptr captures(none) [[B:%.*]]) !dbg [[DBG11:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], 0, !dbg [[DBG14:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]], !dbg [[DBG14]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @s1, align 8, !dbg [[DBG15:![0-9]+]], !tbaa [[INT_TBAA9]] +; CHECK-NEXT: br label %[[IF_END:.*]], !dbg [[DBG15]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 2, !dbg [[DBG16:![0-9]+]] +; CHECK-NEXT: store i32 10, ptr [[ARRAYIDX]], align 4, !dbg [[DBG16]], !tbaa [[INT_TBAA9]] +; CHECK-NEXT: br label %[[IF_END]], !dbg [[DBG16]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[TMP0]], %[[IF_THEN]] ], [ 0, %[[IF_ELSE]] ] +; CHECK-NEXT: [[P_0:%.*]] = phi ptr [ @s1, %[[IF_THEN]] ], [ [[B]], %[[IF_ELSE]] ] ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 1, !dbg [[DBG17:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !dbg [[DBG17]], !tbaa [[TBAA9]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !dbg [[DBG17]], !tbaa [[INT_TBAA9]] ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], [[I_0]], !dbg [[DBG17]] ; CHECK-NEXT: ret i32 [[ADD1]], !dbg [[DBG17]] ; @@ -144,6 +146,23 @@ if.end: file: !12, isOptimized: true, flags: "-O2", splitDebugFilename: "abc.debug", emissionKind: 2) +;. +; CHECK: [[META3:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META4:![0-9]+]], producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 0, splitDebugFilename: "abc.debug", emissionKind: LineTablesOnly) +; CHECK: [[META4]] = !DIFile(filename: "{{.*}}test.cpp", directory: {{.*}}) +; CHECK: [[ANYPTR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"any pointer", [[META7:![0-9]+]], i64 0} +; CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +; CHECK: [[META8]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[INT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +; CHECK: [[META10]] = !{!"int", [[META7]], i64 0} +; CHECK: [[DBG11]] = distinct !DISubprogram(name: "test", scope: [[META4]], file: [[META4]], line: 99, type: [[META12:![0-9]+]], scopeLine: 100, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META3]], retainedNodes: [[META13:![0-9]+]]) +; CHECK: [[META12]] = !DISubroutineType(types: [[META13]]) +; CHECK: [[META13]] = !{} +; CHECK: [[DBG14]] = !DILocation(line: 100, column: 1, scope: [[DBG11]]) +; CHECK: [[DBG15]] = !DILocation(line: 101, column: 1, scope: [[DBG11]]) +; CHECK: [[DBG16]] = !DILocation(line: 102, column: 1, scope: [[DBG11]]) +; CHECK: [[DBG17]] = !DILocation(line: 103, column: 1, scope: [[DBG11]]) +;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; MDEP: {{.*}} ; MSSA: {{.*}} diff --git a/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll b/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll index abbb17f11f436..49ee089fed393 100644 --- a/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll +++ b/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=gvn -S < %s | FileCheck %s --check-prefixes=CHECK,MDEP ; RUN: opt -passes='gvn' -S < %s | FileCheck %s --check-prefixes=CHECK,MSSA @@ -12,12 +12,12 @@ define void @test(ptr %P, ptr %Q, i1 %arg) nounwind { ; MDEP-NEXT: [[ENTRY:.*:]] ; MDEP-NEXT: br i1 [[ARG]], label %[[BB_NPH:.*]], label %[[FOR_END:.*]] ; MDEP: [[BB_NPH]]: -; MDEP-NEXT: [[TMP33_PRE:%.*]] = load i16, ptr [[P]], align 2, !tbaa [[TBAA0:![0-9]+]] +; MDEP-NEXT: [[TMP33_PRE:%.*]] = load i16, ptr [[P]], align 2, !tbaa [[SHORT_TBAA0:![0-9]+]] ; MDEP-NEXT: br label %[[FOR_BODY:.*]] ; MDEP: [[FOR_BODY]]: ; MDEP-NEXT: [[TMP33:%.*]] = phi i16 [ 0, %[[FOR_BODY]] ], [ [[TMP33_PRE]], %[[BB_NPH]] ] ; MDEP-NEXT: store i16 [[TMP33]], ptr [[Q]], align 2 -; MDEP-NEXT: store i16 0, ptr [[P]], align 2, !tbaa [[TBAA0]] +; MDEP-NEXT: store i16 0, ptr [[P]], align 2, !tbaa [[SHORT_TBAA0]] ; MDEP-NEXT: br i1 false, label %[[FOR_BODY_FOR_END_CRIT_EDGE:.*]], label %[[FOR_BODY]] ; MDEP: [[FOR_BODY_FOR_END_CRIT_EDGE]]: ; MDEP-NEXT: br label %[[FOR_END]] @@ -31,9 +31,9 @@ define void @test(ptr %P, ptr %Q, i1 %arg) nounwind { ; MSSA: [[BB_NPH]]: ; MSSA-NEXT: br label %[[FOR_BODY:.*]] ; MSSA: [[FOR_BODY]]: -; MSSA-NEXT: [[TMP33:%.*]] = load i16, ptr [[P]], align 2, !tbaa [[TBAA0:![0-9]+]] +; MSSA-NEXT: [[TMP33:%.*]] = load i16, ptr [[P]], align 2, !tbaa [[SHORT_TBAA0:![0-9]+]] ; MSSA-NEXT: store i16 [[TMP33]], ptr [[Q]], align 2 -; MSSA-NEXT: store i16 0, ptr [[P]], align 2, !tbaa [[TBAA0]] +; MSSA-NEXT: store i16 0, ptr [[P]], align 2, !tbaa [[SHORT_TBAA0]] ; MSSA-NEXT: br i1 false, label %[[FOR_BODY_FOR_END_CRIT_EDGE:.*]], label %[[FOR_BODY]] ; MSSA: [[FOR_BODY_FOR_END_CRIT_EDGE]]: ; MSSA-NEXT: br label %[[FOR_END]] @@ -62,12 +62,12 @@ for.end: ; preds = %for.body, %entry !2 = !{!"Simple C/C++ TBAA"} !3 = !{!"short", !1} ;. -; MDEP: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; MDEP: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; MDEP: [[META1]] = !{!"short", [[META2:![0-9]+]]} ; MDEP: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]} ; MDEP: [[META3]] = !{!"Simple C/C++ TBAA"} ;. -; MSSA: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; MSSA: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; MSSA: [[META1]] = !{!"short", [[META2:![0-9]+]]} ; MSSA: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]} ; MSSA: [[META3]] = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/Transforms/GVN/pr33549.ll b/llvm/test/Transforms/GVN/pr33549.ll index e0d7712c6f5cc..a8ce37c4f86a6 100644 --- a/llvm/test/Transforms/GVN/pr33549.ll +++ b/llvm/test/Transforms/GVN/pr33549.ll @@ -1,41 +1,42 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=gvn -S < %s | FileCheck %s @Data = common local_unnamed_addr global [32 x i32] zeroinitializer, align 4 ; Function Attrs: norecurse nounwind define void @testshl() local_unnamed_addr #0 { -; CHECK-LABEL: @testshl( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[K_031:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC9:%.*]], [[FOR_INC8:%.*]] ] +; CHECK-LABEL: define void @testshl( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[K_031:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_INC8:.*]] ] ; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[K_031]] ; CHECK-NEXT: [[SHR:%.*]] = ashr exact i32 [[SHL]], 1 ; CHECK-NEXT: [[CMP229:%.*]] = icmp slt i32 [[SHL]], 64 -; CHECK-NEXT: br i1 [[CMP229]], label [[FOR_BODY3_PREHEADER:%.*]], label [[FOR_INC8]] -; CHECK: for.body3.preheader: +; CHECK-NEXT: br i1 [[CMP229]], label %[[FOR_BODY3_PREHEADER:.*]], label %[[FOR_INC8]] +; CHECK: [[FOR_BODY3_PREHEADER]]: ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SHR]], 2 -; CHECK-NEXT: br label [[FOR_BODY3:%.*]] -; CHECK: for.body3: -; CHECK-NEXT: [[I_030:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY3]] ], [ [[DIV]], [[FOR_BODY3_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_BODY3:.*]] +; CHECK: [[FOR_BODY3]]: +; CHECK-NEXT: [[I_030:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY3]] ], [ [[DIV]], %[[FOR_BODY3_PREHEADER]] ] ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[I_030]], [[SHR]] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i32], ptr @Data, i32 0, i32 [[ADD]] ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [32 x i32], ptr @Data, i32 0, i32 [[I_030]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, [[TBAA3:!tbaa !.*]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, [[TBAA3]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], [[TMP0]] -; CHECK-NEXT: store i32 [[SUB]], ptr [[ARRAYIDX]], align 4, [[TBAA3]] +; CHECK-NEXT: store i32 [[SUB]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] -; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX4]], align 4, [[TBAA3]] +; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX4]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_030]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I_030]], 15 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC8]] -; CHECK: for.inc8: +; CHECK-NEXT: br i1 [[CMP2]], label %[[FOR_BODY3]], label %[[FOR_INC8]] +; CHECK: [[FOR_INC8]]: ; CHECK-NEXT: [[INC9]] = add nuw nsw i32 [[K_031]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC9]], 8 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END10:%.*]], label [[FOR_BODY]] -; CHECK: for.end10: +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END10:.*]], label %[[FOR_BODY]] +; CHECK: [[FOR_END10]]: ; CHECK-NEXT: ret void ; entry: @@ -89,3 +90,9 @@ attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="fa !4 = !{!"int", !5, i64 0} !5 = !{!"omnipotent char", !6, i64 0} !6 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/GVN/pr64598.ll b/llvm/test/Transforms/GVN/pr64598.ll index 902af984bce2b..80a9198b41c50 100644 --- a/llvm/test/Transforms/GVN/pr64598.ll +++ b/llvm/test/Transforms/GVN/pr64598.ll @@ -1,61 +1,61 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -passes=gvn < %s | FileCheck %s define i32 @main(i64 %x, ptr %d, ptr noalias %p) { -; CHECK-LABEL: define i32 @main -; CHECK-SAME: (i64 [[X:%.*]], ptr [[D:%.*]], ptr noalias [[P:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @main( +; CHECK-SAME: i64 [[X:%.*]], ptr [[D:%.*]], ptr noalias [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[T1_PRE_PRE_PRE:%.*]] = load ptr, ptr [[P]], align 8 -; CHECK-NEXT: [[T2_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T1_PRE_PRE_PRE]], align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[T2_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T1_PRE_PRE_PRE]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]] ; CHECK-NEXT: [[T3_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T2_PRE_PRE_PRE]], align 8 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[T2_PRE_PRE:%.*]] = phi ptr [ [[T2_PRE_PRE23:%.*]], [[LOOP_LATCH:%.*]] ], [ [[T2_PRE_PRE_PRE]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[T1_PRE_PRE:%.*]] = phi ptr [ [[T1_PRE_PRE19:%.*]], [[LOOP_LATCH]] ], [ [[T1_PRE_PRE_PRE]], [[ENTRY]] ] -; CHECK-NEXT: br label [[LOOP2:%.*]] -; CHECK: loop2: -; CHECK-NEXT: [[T2_PRE_PRE25:%.*]] = phi ptr [ [[T2_PRE_PRE23]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE:%.*]] ], [ [[T2_PRE_PRE]], [[LOOP]] ] -; CHECK-NEXT: [[T1_PRE_PRE21:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], [[LOOP]] ] -; CHECK-NEXT: [[T3_PRE:%.*]] = phi ptr [ [[T3_PRE16:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T3_PRE_PRE_PRE]], [[LOOP]] ] -; CHECK-NEXT: [[T2_PRE:%.*]] = phi ptr [ [[T2_PRE13:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T2_PRE_PRE]], [[LOOP]] ] -; CHECK-NEXT: [[T1_PRE:%.*]] = phi ptr [ [[T1_PRE10:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], [[LOOP]] ] -; CHECK-NEXT: br label [[LOOP3:%.*]] -; CHECK: loop3: -; CHECK-NEXT: [[T2_PRE_PRE24:%.*]] = phi ptr [ [[T2_PRE_PRE23]], [[LOOP3_LATCH:%.*]] ], [ [[T2_PRE_PRE25]], [[LOOP2]] ] -; CHECK-NEXT: [[T1_PRE_PRE20:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP3_LATCH]] ], [ [[T1_PRE_PRE21]], [[LOOP2]] ] -; CHECK-NEXT: [[T3_PRE17:%.*]] = phi ptr [ [[T3_PRE16]], [[LOOP3_LATCH]] ], [ [[T3_PRE]], [[LOOP2]] ] -; CHECK-NEXT: [[T2_PRE14:%.*]] = phi ptr [ [[T2_PRE13]], [[LOOP3_LATCH]] ], [ [[T2_PRE]], [[LOOP2]] ] -; CHECK-NEXT: [[T1_PRE11:%.*]] = phi ptr [ [[T1_PRE10]], [[LOOP3_LATCH]] ], [ [[T1_PRE]], [[LOOP2]] ] -; CHECK-NEXT: [[T78:%.*]] = phi ptr [ [[T7:%.*]], [[LOOP3_LATCH]] ], [ [[T3_PRE]], [[LOOP2]] ] -; CHECK-NEXT: [[T66:%.*]] = phi ptr [ [[T6:%.*]], [[LOOP3_LATCH]] ], [ [[T2_PRE]], [[LOOP2]] ] -; CHECK-NEXT: [[T54:%.*]] = phi ptr [ [[T5:%.*]], [[LOOP3_LATCH]] ], [ [[T1_PRE]], [[LOOP2]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[T2_PRE_PRE:%.*]] = phi ptr [ [[T2_PRE_PRE23:%.*]], %[[LOOP_LATCH:.*]] ], [ [[T2_PRE_PRE_PRE]], %[[ENTRY]] ] +; CHECK-NEXT: [[T1_PRE_PRE:%.*]] = phi ptr [ [[T1_PRE_PRE19:%.*]], %[[LOOP_LATCH]] ], [ [[T1_PRE_PRE_PRE]], %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP2:.*]] +; CHECK: [[LOOP2]]: +; CHECK-NEXT: [[T2_PRE_PRE25:%.*]] = phi ptr [ [[T2_PRE_PRE23]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE:.*]] ], [ [[T2_PRE_PRE]], %[[LOOP]] ] +; CHECK-NEXT: [[T1_PRE_PRE21:%.*]] = phi ptr [ [[T1_PRE_PRE19]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], %[[LOOP]] ] +; CHECK-NEXT: [[T3_PRE:%.*]] = phi ptr [ [[T3_PRE16:%.*]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T3_PRE_PRE_PRE]], %[[LOOP]] ] +; CHECK-NEXT: [[T2_PRE:%.*]] = phi ptr [ [[T2_PRE13:%.*]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T2_PRE_PRE]], %[[LOOP]] ] +; CHECK-NEXT: [[T1_PRE:%.*]] = phi ptr [ [[T1_PRE10:%.*]], %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP3:.*]] +; CHECK: [[LOOP3]]: +; CHECK-NEXT: [[T2_PRE_PRE24:%.*]] = phi ptr [ [[T2_PRE_PRE23]], %[[LOOP3_LATCH:.*]] ], [ [[T2_PRE_PRE25]], %[[LOOP2]] ] +; CHECK-NEXT: [[T1_PRE_PRE20:%.*]] = phi ptr [ [[T1_PRE_PRE19]], %[[LOOP3_LATCH]] ], [ [[T1_PRE_PRE21]], %[[LOOP2]] ] +; CHECK-NEXT: [[T3_PRE17:%.*]] = phi ptr [ [[T3_PRE16]], %[[LOOP3_LATCH]] ], [ [[T3_PRE]], %[[LOOP2]] ] +; CHECK-NEXT: [[T2_PRE14:%.*]] = phi ptr [ [[T2_PRE13]], %[[LOOP3_LATCH]] ], [ [[T2_PRE]], %[[LOOP2]] ] +; CHECK-NEXT: [[T1_PRE11:%.*]] = phi ptr [ [[T1_PRE10]], %[[LOOP3_LATCH]] ], [ [[T1_PRE]], %[[LOOP2]] ] +; CHECK-NEXT: [[T78:%.*]] = phi ptr [ [[T7:%.*]], %[[LOOP3_LATCH]] ], [ [[T3_PRE]], %[[LOOP2]] ] +; CHECK-NEXT: [[T66:%.*]] = phi ptr [ [[T6:%.*]], %[[LOOP3_LATCH]] ], [ [[T2_PRE]], %[[LOOP2]] ] +; CHECK-NEXT: [[T54:%.*]] = phi ptr [ [[T5:%.*]], %[[LOOP3_LATCH]] ], [ [[T1_PRE]], %[[LOOP2]] ] ; CHECK-NEXT: [[TOBOOL_NOT2_I:%.*]] = icmp eq i64 [[X]], 0 -; CHECK-NEXT: br i1 false, label [[LOOP3_LOOP3_LATCH_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH_I:%.*]] -; CHECK: loop3.loop3.latch_crit_edge: -; CHECK-NEXT: br label [[LOOP3_LATCH]] -; CHECK: for.body.lr.ph.i: +; CHECK-NEXT: br i1 false, label %[[LOOP3_LOOP3_LATCH_CRIT_EDGE:.*]], label %[[FOR_BODY_LR_PH_I:.*]] +; CHECK: [[LOOP3_LOOP3_LATCH_CRIT_EDGE]]: +; CHECK-NEXT: br label %[[LOOP3_LATCH]] +; CHECK: [[FOR_BODY_LR_PH_I]]: ; CHECK-NEXT: store i32 0, ptr [[P]], align 4 ; CHECK-NEXT: [[T5_PRE:%.*]] = load ptr, ptr [[P]], align 8 -; CHECK-NEXT: [[T6_PRE:%.*]] = load ptr, ptr [[T5_PRE]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[T6_PRE:%.*]] = load ptr, ptr [[T5_PRE]], align 8, !tbaa [[ANYPTR_TBAA0]] ; CHECK-NEXT: [[T7_PRE:%.*]] = load ptr, ptr [[T6_PRE]], align 8 -; CHECK-NEXT: br label [[LOOP3_LATCH]] -; CHECK: loop3.latch: -; CHECK-NEXT: [[T2_PRE_PRE23]] = phi ptr [ [[T2_PRE_PRE24]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ] -; CHECK-NEXT: [[T1_PRE_PRE19]] = phi ptr [ [[T1_PRE_PRE20]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], [[FOR_BODY_LR_PH_I]] ] -; CHECK-NEXT: [[T3_PRE16]] = phi ptr [ [[T3_PRE17]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], [[FOR_BODY_LR_PH_I]] ] -; CHECK-NEXT: [[T2_PRE13]] = phi ptr [ [[T2_PRE14]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ] -; CHECK-NEXT: [[T1_PRE10]] = phi ptr [ [[T1_PRE11]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], [[FOR_BODY_LR_PH_I]] ] -; CHECK-NEXT: [[T7]] = phi ptr [ [[T78]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], [[FOR_BODY_LR_PH_I]] ] -; CHECK-NEXT: [[T6]] = phi ptr [ [[T66]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ] -; CHECK-NEXT: [[T5]] = phi ptr [ [[T54]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], [[FOR_BODY_LR_PH_I]] ] -; CHECK-NEXT: br i1 false, label [[LOOP2_LATCH:%.*]], label [[LOOP3]] -; CHECK: loop2.latch: -; CHECK-NEXT: br i1 false, label [[LOOP2_LATCH_LOOP2_CRIT_EDGE]], label [[LOOP_LATCH]] -; CHECK: loop2.latch.loop2_crit_edge: -; CHECK-NEXT: br label [[LOOP2]] -; CHECK: loop.latch: -; CHECK-NEXT: store i32 0, ptr [[D]], align 4, !tbaa [[TBAA4:![0-9]+]] -; CHECK-NEXT: br label [[LOOP]] +; CHECK-NEXT: br label %[[LOOP3_LATCH]] +; CHECK: [[LOOP3_LATCH]]: +; CHECK-NEXT: [[T2_PRE_PRE23]] = phi ptr [ [[T2_PRE_PRE24]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], %[[FOR_BODY_LR_PH_I]] ] +; CHECK-NEXT: [[T1_PRE_PRE19]] = phi ptr [ [[T1_PRE_PRE20]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], %[[FOR_BODY_LR_PH_I]] ] +; CHECK-NEXT: [[T3_PRE16]] = phi ptr [ [[T3_PRE17]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], %[[FOR_BODY_LR_PH_I]] ] +; CHECK-NEXT: [[T2_PRE13]] = phi ptr [ [[T2_PRE14]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], %[[FOR_BODY_LR_PH_I]] ] +; CHECK-NEXT: [[T1_PRE10]] = phi ptr [ [[T1_PRE11]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], %[[FOR_BODY_LR_PH_I]] ] +; CHECK-NEXT: [[T7]] = phi ptr [ [[T78]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], %[[FOR_BODY_LR_PH_I]] ] +; CHECK-NEXT: [[T6]] = phi ptr [ [[T66]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], %[[FOR_BODY_LR_PH_I]] ] +; CHECK-NEXT: [[T5]] = phi ptr [ [[T54]], %[[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], %[[FOR_BODY_LR_PH_I]] ] +; CHECK-NEXT: br i1 false, label %[[LOOP2_LATCH:.*]], label %[[LOOP3]] +; CHECK: [[LOOP2_LATCH]]: +; CHECK-NEXT: br i1 false, label %[[LOOP2_LATCH_LOOP2_CRIT_EDGE]], label %[[LOOP_LATCH]] +; CHECK: [[LOOP2_LATCH_LOOP2_CRIT_EDGE]]: +; CHECK-NEXT: br label %[[LOOP2]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: store i32 0, ptr [[D]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] +; CHECK-NEXT: br label %[[LOOP]] ; entry: br label %loop @@ -101,3 +101,11 @@ loop.latch: !3 = !{!"Simple C/C++ TBAA"} !4 = !{!5, !5, i64 0} !5 = !{!"int", !2, i64 0} +;. +; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"int", [[META2]], i64 0} +;. diff --git a/llvm/test/Transforms/GVN/tbaa.ll b/llvm/test/Transforms/GVN/tbaa.ll index 59ace145b5657..bb9b0dea73ab1 100644 --- a/llvm/test/Transforms/GVN/tbaa.ll +++ b/llvm/test/Transforms/GVN/tbaa.ll @@ -1,17 +1,17 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=gvn -S < %s | FileCheck --check-prefixes=CHECK,MDEP %s ; RUN: opt -passes='gvn' -S < %s | FileCheck --check-prefixes=CHECK,MSSA %s define i32 @test1(ptr %p, ptr %q) { ; MDEP-LABEL: define i32 @test1( ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]] +; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0:![0-9]+]] ; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; MDEP-NEXT: ret i32 [[C]] ; ; MSSA-LABEL: define i32 @test1( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]] +; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0:![0-9]+]] ; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]) ; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] @@ -25,14 +25,14 @@ define i32 @test1(ptr %p, ptr %q) { define i32 @test2(ptr %p, ptr %q) { ; MDEP-LABEL: define i32 @test2( ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] ; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; MDEP-NEXT: ret i32 [[C]] ; ; MSSA-LABEL: define i32 @test2( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] +; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] ; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] ; @@ -45,14 +45,14 @@ define i32 @test2(ptr %p, ptr %q) { define i32 @test3(ptr %p, ptr %q) { ; MDEP-LABEL: define i32 @test3( ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]] +; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4:![0-9]+]] ; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; MDEP-NEXT: ret i32 [[C]] ; ; MSSA-LABEL: define i32 @test3( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4]] +; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4:![0-9]+]] +; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4]] ; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] ; @@ -65,14 +65,14 @@ define i32 @test3(ptr %p, ptr %q) { define i32 @test4(ptr %p, ptr %q) { ; MDEP-LABEL: define i32 @test4( ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]] +; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[A_TBAA6:![0-9]+]] ; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; MDEP-NEXT: ret i32 [[C]] ; ; MSSA-LABEL: define i32 @test4( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[A_TBAA6:![0-9]+]] +; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] ; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] ; @@ -85,14 +85,14 @@ define i32 @test4(ptr %p, ptr %q) { define i32 @test5(ptr %p, ptr %q) { ; MDEP-LABEL: define i32 @test5( ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] ; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; MDEP-NEXT: ret i32 [[C]] ; ; MSSA-LABEL: define i32 @test5( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6]] +; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] +; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[A_TBAA6]] ; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] ; @@ -105,14 +105,14 @@ define i32 @test5(ptr %p, ptr %q) { define i32 @test6(ptr %p, ptr %q) { ; MDEP-LABEL: define i32 @test6( ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] ; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; MDEP-NEXT: ret i32 [[C]] ; ; MSSA-LABEL: define i32 @test6( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4]] +; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] +; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4]] ; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] ; @@ -125,14 +125,14 @@ define i32 @test6(ptr %p, ptr %q) { define i32 @test7(ptr %p, ptr %q) { ; MDEP-LABEL: define i32 @test7( ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]] +; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[SCALAR_TYPE_TBAA7:![0-9]+]] ; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; MDEP-NEXT: ret i32 [[C]] ; ; MSSA-LABEL: define i32 @test7( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4]] +; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[SCALAR_TYPE_TBAA7:![0-9]+]] +; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4]] ; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] ; @@ -150,9 +150,9 @@ define i32 @test8(ptr %p, ptr %q) { ; ; MSSA-LABEL: define i32 @test8( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10:![0-9]+]] +; MSSA-NEXT: [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[NODE_TBAA10:![0-9]+]] ; MSSA-NEXT: store i32 15, ptr [[P]], align 4 -; MSSA-NEXT: [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10]] +; MSSA-NEXT: [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[NODE_TBAA10]] ; MSSA-NEXT: [[C:%.*]] = sub i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] ; @@ -174,9 +174,9 @@ define i32 @test9(ptr %p, ptr %q) { ; ; MSSA-LABEL: define i32 @test9( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10]] +; MSSA-NEXT: [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[NODE_TBAA10]] ; MSSA-NEXT: call void @clobber() -; MSSA-NEXT: [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10]] +; MSSA-NEXT: [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[NODE_TBAA10]] ; MSSA-NEXT: [[C:%.*]] = sub i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] ; @@ -195,14 +195,14 @@ define i32 @test10(ptr %p, ptr %q) { ; and not just the common final access type. ; MDEP-LABEL: define i32 @test10( ; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA10:![0-9]+]] +; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[INT_TBAA10:![0-9]+]] ; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; MDEP-NEXT: ret i32 [[C]] ; ; MSSA-LABEL: define i32 @test10( ; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA13:![0-9]+]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA17:![0-9]+]] +; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[INT_TBAA13:![0-9]+]] +; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[INT_TBAA17:![0-9]+]] ; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; MSSA-NEXT: ret i32 [[C]] ; @@ -238,39 +238,39 @@ declare i32 @foo(ptr) readonly !9 = !{!"yet another root"} !10 = !{!"node", !9, i64 1} ;. -; MDEP: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; MDEP: [[C_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; MDEP: [[META1]] = !{!"C", [[META2:![0-9]+]]} ; MDEP: [[META2]] = !{!"A", [[META3:![0-9]+]]} ; MDEP: [[META3]] = !{!"tbaa root"} -; MDEP: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; MDEP: [[B_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} ; MDEP: [[META5]] = !{!"B", [[META2]]} -; MDEP: [[TBAA6]] = !{[[META2]], [[META2]], i64 0} -; MDEP: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; MDEP: [[A_TBAA6]] = !{[[META2]], [[META2]], i64 0} +; MDEP: [[SCALAR_TYPE_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} ; MDEP: [[META8]] = !{!"scalar type", [[META9:![0-9]+]]} ; MDEP: [[META9]] = !{!"another root"} -; MDEP: [[TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0} +; MDEP: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0} ; MDEP: [[META11]] = !{!"struct X", [[META12]], i64 0} ; MDEP: [[META12]] = !{!"int", [[META13:![0-9]+]], i64 0} ; MDEP: [[META13]] = !{!"char", [[META3]], i64 0} ;. -; MSSA: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; MSSA: [[C_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; MSSA: [[META1]] = !{!"C", [[META2:![0-9]+]]} ; MSSA: [[META2]] = !{!"A", [[META3:![0-9]+]]} ; MSSA: [[META3]] = !{!"tbaa root"} -; MSSA: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; MSSA: [[B_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} ; MSSA: [[META5]] = !{!"B", [[META2]]} -; MSSA: [[TBAA6]] = !{[[META2]], [[META2]], i64 0} -; MSSA: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; MSSA: [[A_TBAA6]] = !{[[META2]], [[META2]], i64 0} +; MSSA: [[SCALAR_TYPE_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} ; MSSA: [[META8]] = !{!"scalar type", [[META9:![0-9]+]]} ; MSSA: [[META9]] = !{!"another root"} -; MSSA: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0, i64 1} +; MSSA: [[NODE_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0, i64 1} ; MSSA: [[META11]] = !{!"node", [[META12:![0-9]+]]} ; MSSA: [[META12]] = !{!"yet another root"} -; MSSA: [[TBAA13]] = !{[[META14:![0-9]+]], [[META15:![0-9]+]], i64 0} +; MSSA: [[INT_TBAA13]] = !{[[META14:![0-9]+]], [[META15:![0-9]+]], i64 0} ; MSSA: [[META14]] = !{!"struct X", [[META15]], i64 0} ; MSSA: [[META15]] = !{!"int", [[META16:![0-9]+]], i64 0} ; MSSA: [[META16]] = !{!"char", [[META3]], i64 0} -; MSSA: [[TBAA17]] = !{[[META18:![0-9]+]], [[META15]], i64 0} +; MSSA: [[INT_TBAA17]] = !{[[META18:![0-9]+]], [[META15]], i64 0} ; MSSA: [[META18]] = !{!"struct Y", [[META14]], i64 0} ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/llvm/test/Transforms/GVNHoist/hoist-md.ll b/llvm/test/Transforms/GVNHoist/hoist-md.ll index 26fe475535add..2ef9bc30433c3 100644 --- a/llvm/test/Transforms/GVNHoist/hoist-md.ll +++ b/llvm/test/Transforms/GVNHoist/hoist-md.ll @@ -1,19 +1,19 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt -S -passes=gvn-hoist < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @test1(i1 %b, ptr %x) { -; CHECK-LABEL: define void @test1 -; CHECK-SAME: (i1 [[B:%.*]], ptr [[X:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 2, ptr [[X]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: +; CHECK-LABEL: define void @test1( +; CHECK-SAME: i1 [[B:%.*]], ptr [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store i32 2, ptr [[X]], align 4, !tbaa [[CHAR_TBAA0:![0-9]+]] +; CHECK-NEXT: br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: br label %[[IF_END:.*]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: ; CHECK-NEXT: ret void ; entry: @@ -32,19 +32,19 @@ if.end: ; preds = %if.else, %if.then } define void @test2(i1 %b, ptr %x) { -; CHECK-LABEL: define void @test2 -; CHECK-SAME: (i1 [[B:%.*]], ptr [[X:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test2( +; CHECK-SAME: i1 [[B:%.*]], ptr [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1 -; CHECK-NEXT: store i32 2, ptr [[TMP0]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: +; CHECK-NEXT: store i32 2, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +; CHECK: [[IF_THEN]]: ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1 -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: +; CHECK-NEXT: br label %[[IF_END:.*]] +; CHECK: [[IF_ELSE]]: ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: ; CHECK-NEXT: ret void ; entry: @@ -65,19 +65,19 @@ if.end: ; preds = %if.else, %if.then } define void @test3(i1 %b, ptr %x) { -; CHECK-LABEL: define void @test3 -; CHECK-SAME: (i1 [[B:%.*]], ptr [[X:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test3( +; CHECK-SAME: i1 [[B:%.*]], ptr [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[X]], i64 1 -; CHECK-NEXT: store i32 2, ptr [[TMP0]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: +; CHECK-NEXT: store i32 2, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +; CHECK: [[IF_THEN]]: ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1 -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: +; CHECK-NEXT: br label %[[IF_END:.*]] +; CHECK: [[IF_ELSE]]: ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[X]], i64 1 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: ; CHECK-NEXT: ret void ; entry: @@ -98,17 +98,17 @@ if.end: ; preds = %if.else, %if.then } define i32 @test4(i1 %b, ptr %y) { -; CHECK-LABEL: define i32 @test4 -; CHECK-SAME: (i1 [[B:%.*]], ptr [[Y:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test4( +; CHECK-SAME: i1 [[B:%.*]], ptr [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !range [[RNG3:![0-9]+]] -; CHECK-NEXT: br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[RETURN:%.*]] -; CHECK: if.end: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[TMP0]], [[IF_THEN]] ], [ [[TMP0]], [[IF_END]] ] +; CHECK-NEXT: br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: br label %[[RETURN:.*]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[TMP0]], %[[IF_THEN]] ], [ [[TMP0]], %[[IF_END]] ] ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; entry: @@ -128,17 +128,17 @@ return: ; preds = %if.end, %if.then } define ptr @test5(i1 %b, ptr %y) { -; CHECK-LABEL: define ptr @test5 -; CHECK-SAME: (i1 [[B:%.*]], ptr [[Y:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-LABEL: define ptr @test5( +; CHECK-SAME: i1 [[B:%.*]], ptr [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y]], align 4 -; CHECK-NEXT: br i1 [[B]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[RETURN:%.*]] -; CHECK: if.end: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi ptr [ [[TMP0]], [[IF_THEN]] ], [ [[TMP0]], [[IF_END]] ] +; CHECK-NEXT: br i1 [[B]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: br label %[[RETURN:.*]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi ptr [ [[TMP0]], %[[IF_THEN]] ], [ [[TMP0]], %[[IF_END]] ] ; CHECK-NEXT: ret ptr [[RETVAL_0]] ; entry: @@ -167,8 +167,8 @@ return: ; preds = %if.end, %if.then !8 = !{i32 3, i32 4} !9 = !{} ;. -; CHECK: [[TBAA0]] = !{!1, !1, i64 0} -; CHECK: [[META1:![0-9]+]] = !{!"omnipotent char", !2, i64 0} -; CHECK: [[META2:![0-9]+]] = !{!"Simple C++ TBAA"} +; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"Simple C++ TBAA"} ; CHECK: [[RNG3]] = !{i32 0, i32 2, i32 3, i32 4} ;. diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll index 1c317786d1c20..ebc5c0d717c6d 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -1,10 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s define amdgpu_kernel void @memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { ; CHECK-LABEL: define amdgpu_kernel void @memset_group_to_flat( ; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 [[GROUP_PTR]], i8 4, i64 32, i1 false), !tbaa [[TBAA0:![0-9]+]], !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 [[GROUP_PTR]], i8 4, i64 32, i1 false), !tbaa [[A_TBAA0:![0-9]+]], !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]] ; CHECK-NEXT: ret void ; %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr @@ -15,7 +15,7 @@ define amdgpu_kernel void @memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 define amdgpu_kernel void @memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { ; CHECK-LABEL: define amdgpu_kernel void @memset_global_to_flat( ; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 [[GLOBAL_PTR]], i8 4, i64 32, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] +; CHECK-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 [[GLOBAL_PTR]], i8 4, i64 32, i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: ret void ; %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr @@ -49,7 +49,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest, ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group( ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1) -; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] +; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: ret void ; %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr @@ -61,7 +61,7 @@ define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr ; CHECK-LABEL: define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group( ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1) -; CHECK-NEXT: call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] +; CHECK-NEXT: call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: ret void ; %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr @@ -73,7 +73,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrs ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group( ; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr [[SRC_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[SRC_PTR]] to ptr addrspace(1) -; CHECK-NEXT: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] +; CHECK-NEXT: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: ret void ; %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr @@ -84,7 +84,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrs define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group( ; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] +; CHECK-NEXT: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: ret void ; %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr @@ -96,7 +96,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(ptr a define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(ptr addrspace(3) %dest.group.ptr, ptr addrspace(1) %src.global.ptr, i64 %size) #0 { ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global( ; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr addrspace(1) [[SRC_GLOBAL_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[SRC_GLOBAL_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] +; CHECK-NEXT: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[SRC_GLOBAL_PTR]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: ret void ; %cast.src = addrspacecast ptr addrspace(1) %src.global.ptr to ptr @@ -108,7 +108,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(ptr define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspace(1) %dest.global.ptr, ptr addrspace(3) %src.group.ptr, i32 %size) #0 { ; CHECK-LABEL: define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global( ; CHECK-SAME: ptr addrspace(1) [[DEST_GLOBAL_PTR:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i32 [[SIZE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 [[DEST_GLOBAL_PTR]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i32 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] +; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 [[DEST_GLOBAL_PTR]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i32 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: ret void ; %cast.dest = addrspacecast ptr addrspace(1) %dest.global.ptr to ptr @@ -159,7 +159,7 @@ define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_n define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %group.ptr) #0 { ; CHECK-LABEL: define amdgpu_kernel void @memcpy_group_flat_to_flat_self( ; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 [[GROUP_PTR]], ptr addrspace(3) align 4 [[GROUP_PTR]], i64 32, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] +; CHECK-NEXT: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 [[GROUP_PTR]], ptr addrspace(3) align 4 [[GROUP_PTR]], i64 32, i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: ret void ; %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr @@ -170,7 +170,7 @@ define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr %dest ; CHECK-LABEL: define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group( ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1) -; CHECK-NEXT: call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] +; CHECK-NEXT: call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[A_TBAA0]], !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: ret void ; %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr @@ -236,7 +236,7 @@ attributes #1 = { argmemonly nounwind } !7 = distinct !{!7, !5, !"some scope 2"} !8 = !{i64 0, i64 8, null} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[A_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"A", [[META2:![0-9]+]]} ; CHECK: [[META2]] = !{!"tbaa root"} ; CHECK: [[META3]] = !{[[META4:![0-9]+]]} diff --git a/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll b/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll index 08ce83b389786..ee3f2305f1a2c 100644 --- a/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll +++ b/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=instcombine -instcombine-lower-dbg-declare=0 | FileCheck %s ; In this example, instcombine wants to turn "local" into an i64, since that's @@ -24,12 +24,13 @@ target triple = "x86_64-pc-windows-msvc19.11.25508" %struct.Foo = type { i32, i32 } define void @f(ptr %p) !dbg !11 { -; CHECK-LABEL: @f( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @f( +; CHECK-SAME: ptr [[P:%.*]]) !dbg [[DBG11:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: #dbg_declare(ptr [[LOCAL]], [[META22:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) -; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P:%.*]], align 8, !dbg [[DBG24:![0-9]+]], !tbaa [[TBAA25:![0-9]+]] -; CHECK-NEXT: store i64 [[TMP0]], ptr [[LOCAL]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[TBAA25]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !dbg [[DBG24:![0-9]+]], !tbaa [[LONG_LONG_TBAA25:![0-9]+]] +; CHECK-NEXT: store i64 [[TMP0]], ptr [[LOCAL]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[LONG_LONG_TBAA25]] ; CHECK-NEXT: call void @escape(ptr nonnull [[LOCAL]]), !dbg [[DBG30:![0-9]+]] ; CHECK-NEXT: ret void, !dbg [[DBG31:![0-9]+]] ; diff --git a/llvm/test/Transforms/InstCombine/load-no-aliasing.ll b/llvm/test/Transforms/InstCombine/load-no-aliasing.ll index 67dfe9d6da265..a93892119056c 100644 --- a/llvm/test/Transforms/InstCombine/load-no-aliasing.ll +++ b/llvm/test/Transforms/InstCombine/load-no-aliasing.ll @@ -1,12 +1,13 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s ; Check that load to load forwarding works with non aliasing store inbetween. define i32 @test_load_store_load_combine(ptr, ptr) { -; CHECK-LABEL: @test_load_store_load_combine( -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-LABEL: define i32 @test_load_store_load_combine( +; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] ; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[A]] to float -; CHECK-NEXT: store float [[F]], ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: store float [[F]], ptr [[TMP1]], align 4, !tbaa [[FLOAT_TBAA4:![0-9]+]] ; CHECK-NEXT: ret i32 [[A]] ; %a = load i32, ptr %0, align 4, !tbaa !0 @@ -22,3 +23,11 @@ define i32 @test_load_store_load_combine(ptr, ptr) { !3 = !{!"Simple C++ TBAA"} !4 = !{!5, !5, i64 0} !5 = !{!"float", !2, i64 0} +;. +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +; CHECK: [[FLOAT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"float", [[META2]], i64 0} +;. diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll index 859c9b892f156..761129979445c 100644 --- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll +++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll @@ -1,13 +1,14 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" define i32 @test_load_cast_combine_tbaa(ptr %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA. -; CHECK-LABEL: @test_load_cast_combine_tbaa( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-LABEL: define i32 @test_load_cast_combine_tbaa( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !tbaa [[SCALAR_TYPE_TBAA0:![0-9]+]] ; CHECK-NEXT: ret i32 [[L1]] ; entry: @@ -18,9 +19,10 @@ entry: define i32 @test_load_cast_combine_noalias(ptr %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata. -; CHECK-LABEL: @test_load_cast_combine_noalias( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META3]] +; CHECK-LABEL: define i32 @test_load_cast_combine_noalias( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META3]] ; CHECK-NEXT: ret i32 [[L1]] ; entry: @@ -33,9 +35,10 @@ define float @test_load_cast_combine_range(ptr %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) drops range metadata. It ; would be nice to preserve or update it somehow but this is hard when moving ; between types. -; CHECK-LABEL: @test_load_cast_combine_range( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-LABEL: define float @test_load_cast_combine_range( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L1:%.*]] = load float, ptr [[PTR]], align 4 ; CHECK-NEXT: ret float [[L1]] ; entry: @@ -46,9 +49,10 @@ entry: define i32 @test_load_cast_combine_invariant(ptr %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata. -; CHECK-LABEL: @test_load_cast_combine_invariant( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !invariant.load [[META6:![0-9]+]] +; CHECK-LABEL: define i32 @test_load_cast_combine_invariant( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META6:![0-9]+]] ; CHECK-NEXT: ret i32 [[L1]] ; entry: @@ -60,9 +64,10 @@ entry: define i32 @test_load_cast_combine_nontemporal(ptr %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves nontemporal ; metadata. -; CHECK-LABEL: @test_load_cast_combine_nontemporal( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !nontemporal [[META7:![0-9]+]] +; CHECK-LABEL: define i32 @test_load_cast_combine_nontemporal( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !nontemporal [[META7:![0-9]+]] ; CHECK-NEXT: ret i32 [[L1]] ; entry: @@ -74,9 +79,10 @@ entry: define ptr @test_load_cast_combine_align(ptr %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves align ; metadata. -; CHECK-LABEL: @test_load_cast_combine_align( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !align [[META8:![0-9]+]] +; CHECK-LABEL: define ptr @test_load_cast_combine_align( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[PTR]], align 8, !align [[META8:![0-9]+]] ; CHECK-NEXT: ret ptr [[L]] ; entry: @@ -87,9 +93,10 @@ entry: define ptr @test_load_cast_combine_deref(ptr %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves dereferenceable ; metadata. -; CHECK-LABEL: @test_load_cast_combine_deref( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !dereferenceable [[META8]] +; CHECK-LABEL: define ptr @test_load_cast_combine_deref( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[PTR]], align 8, !dereferenceable [[META8]] ; CHECK-NEXT: ret ptr [[L]] ; entry: @@ -100,9 +107,10 @@ entry: define ptr @test_load_cast_combine_deref_or_null(ptr %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves ; dereferenceable_or_null metadata. -; CHECK-LABEL: @test_load_cast_combine_deref_or_null( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !dereferenceable_or_null [[META8]] +; CHECK-LABEL: define ptr @test_load_cast_combine_deref_or_null( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[PTR]], align 8, !dereferenceable_or_null [[META8]] ; CHECK-NEXT: ret ptr [[L]] ; entry: @@ -113,21 +121,22 @@ entry: define void @test_load_cast_combine_loop(ptr %src, ptr %dst, i32 %n) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access ; metadata. -; CHECK-LABEL: @test_load_cast_combine_loop( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-LABEL: define void @test_load_cast_combine_loop( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[I]] to i64 -; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP1]] ; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[SRC_GEP]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] ; CHECK-NEXT: store i32 [[L1]], ptr [[DST_GEP]], align 4 ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP1:![0-9]+]] -; CHECK: exit: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP1:![0-9]+]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: @@ -149,9 +158,10 @@ exit: } define void @test_load_cast_combine_nonnull(ptr %ptr) { -; CHECK-LABEL: @test_load_cast_combine_nonnull( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !nonnull [[META6]] +; CHECK-LABEL: define void @test_load_cast_combine_nonnull( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[PTR]], align 8, !nonnull [[META6]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 336 ; CHECK-NEXT: store ptr [[P]], ptr [[GEP]], align 8 ; CHECK-NEXT: ret void @@ -164,8 +174,9 @@ entry: } define i32 @test_load_cast_combine_noundef(ptr %ptr) { -; CHECK-LABEL: @test_load_cast_combine_noundef( -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !noundef [[META6]] +; CHECK-LABEL: define i32 @test_load_cast_combine_noundef( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !noundef [[META6]] ; CHECK-NEXT: ret i32 [[L1]] ; %l = load float, ptr %ptr, !noundef !{} @@ -175,9 +186,10 @@ define i32 @test_load_cast_combine_noundef(ptr %ptr) { define i32 @test_load_cast_combine_noalias_addrspace(ptr %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA. -; CHECK-LABEL: @test_load_cast_combine_noalias_addrspace( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !noalias.addrspace [[META10:![0-9]+]] +; CHECK-LABEL: define i32 @test_load_cast_combine_noalias_addrspace( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !noalias.addrspace [[META10:![0-9]+]] ; CHECK-NEXT: ret i32 [[L1]] ; entry: @@ -188,11 +200,12 @@ entry: ; Preserve none-UB metadata on loads. define ptr @preserve_load_metadata_after_select_transform1(i1 %c, ptr dereferenceable(8) %a, ptr dereferenceable(8) %b) { -; CHECK-LABEL: @preserve_load_metadata_after_select_transform1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_VAL:%.*]] = load ptr, ptr [[B:%.*]], align 1, !nonnull [[META6]], !align [[META8]] -; CHECK-NEXT: [[A_VAL:%.*]] = load ptr, ptr [[A:%.*]], align 1, !nonnull [[META6]], !align [[META8]] -; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[C:%.*]], ptr [[B_VAL]], ptr [[A_VAL]] +; CHECK-LABEL: define ptr @preserve_load_metadata_after_select_transform1( +; CHECK-SAME: i1 [[C:%.*]], ptr dereferenceable(8) [[A:%.*]], ptr dereferenceable(8) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[B_VAL:%.*]] = load ptr, ptr [[B]], align 1, !nonnull [[META6]], !align [[META8]] +; CHECK-NEXT: [[A_VAL:%.*]] = load ptr, ptr [[A]], align 1, !nonnull [[META6]], !align [[META8]] +; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[C]], ptr [[B_VAL]], ptr [[A_VAL]] ; CHECK-NEXT: ret ptr [[L_SEL]] ; entry: @@ -203,11 +216,12 @@ entry: ; Preserve none-UB metadata on loads. define i32 @preserve_load_metadata_after_select_transform_range(i1 %c, ptr dereferenceable(8) %a, ptr dereferenceable(8) %b) { -; CHECK-LABEL: @preserve_load_metadata_after_select_transform_range( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B:%.*]], align 1, !range [[RNG10:![0-9]+]] -; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A:%.*]], align 1, !range [[RNG10]] -; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[C:%.*]], i32 [[B_VAL]], i32 [[A_VAL]] +; CHECK-LABEL: define i32 @preserve_load_metadata_after_select_transform_range( +; CHECK-SAME: i1 [[C:%.*]], ptr dereferenceable(8) [[A:%.*]], ptr dereferenceable(8) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 1, !range [[RNG11:![0-9]+]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 1, !range [[RNG11]] +; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[C]], i32 [[B_VAL]], i32 [[A_VAL]] ; CHECK-NEXT: ret i32 [[L_SEL]] ; entry: @@ -217,10 +231,11 @@ entry: } define double @preserve_load_metadata_after_select_transform2(ptr %a, ptr %b) { -; CHECK-LABEL: @preserve_load_metadata_after_select_transform2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]] -; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]] +; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform2( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]] +; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]] ; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]] ; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]] ; CHECK-NEXT: ret double [[L_SEL]] @@ -235,10 +250,11 @@ entry: } define double @preserve_load_metadata_after_select_transform_metadata_missing_1(ptr %a, ptr %b) { -; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]] -; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]] +; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform_metadata_missing_1( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A]], align 8, !llvm.access.group [[META6]] +; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]] ; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]] ; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]] ; CHECK-NEXT: ret double [[L_SEL]] @@ -253,10 +269,11 @@ entry: } define double @preserve_load_metadata_after_select_transform_metadata_missing_2(ptr %a, ptr %b) { -; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]] -; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]] +; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform_metadata_missing_2( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A]], align 8, !llvm.access.group [[META6]] +; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B]], align 8, !llvm.access.group [[META6]] ; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]] ; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]] ; CHECK-NEXT: ret double [[L_SEL]] @@ -271,10 +288,11 @@ entry: } define double @preserve_load_metadata_after_select_transform_metadata_missing_3(ptr %a, ptr %b) { -; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_3( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]] -; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !llvm.access.group [[META6]] +; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform_metadata_missing_3( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]] +; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]] ; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]] ; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]] ; CHECK-NEXT: ret double [[L_SEL]] @@ -291,10 +309,11 @@ entry: ; Like preserve_load_metadata_after_select_transform_metadata_missing_3, but ; with different access groups on all loads. define double @preserve_load_metadata_after_select_transform_metadata_missing_4(ptr %a, ptr %b) { -; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_4( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META3]], !llvm.access.group [[META6]] -; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !alias.scope [[META11:![0-9]+]], !noalias [[META11]], !llvm.access.group [[ACC_GRP14:![0-9]+]] +; CHECK-LABEL: define double @preserve_load_metadata_after_select_transform_metadata_missing_4( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !alias.scope [[META3]], !noalias [[META3]], !llvm.access.group [[META6]] +; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !alias.scope [[META12:![0-9]+]], !noalias [[META12]], !llvm.access.group [[ACC_GRP15:![0-9]+]] ; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]] ; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]] ; CHECK-NEXT: ret double [[L_SEL]] @@ -327,7 +346,7 @@ entry: !16 = distinct !{!16} ;. -; CHECK: [[TBAA0]] = !{[[LOOP1]], [[LOOP1]], i64 0} +; CHECK: [[SCALAR_TYPE_TBAA0]] = !{[[LOOP1]], [[LOOP1]], i64 0} ; CHECK: [[LOOP1]] = !{!"scalar type", [[META2:![0-9]+]]} ; CHECK: [[META2]] = !{!"root"} ; CHECK: [[META3]] = !{[[META4:![0-9]+]]} @@ -337,9 +356,10 @@ entry: ; CHECK: [[META7]] = !{i32 1} ; CHECK: [[META8]] = !{i64 8} ; CHECK: [[ACC_GRP9]] = distinct !{} -; CHECK: [[RNG10]] = !{i32 0, i32 42} -; CHECK: [[META11]] = !{[[META12:![0-9]+]]} -; CHECK: [[META12]] = distinct !{[[META12]], [[META13:![0-9]+]]} -; CHECK: [[META13]] = distinct !{[[META13]]} -; CHECK: [[ACC_GRP14]] = distinct !{} +; CHECK: [[META10]] = !{i32 5, i32 6} +; CHECK: [[RNG11]] = !{i32 0, i32 42} +; CHECK: [[META12]] = !{[[META13:![0-9]+]]} +; CHECK: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]]} +; CHECK: [[META14]] = distinct !{[[META14]]} +; CHECK: [[ACC_GRP15]] = distinct !{} ;. diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll index 1a571100323ff..0832561e2b02b 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll @@ -1,12 +1,13 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -passes=instcombine < %s | FileCheck %s @g0 = global <4 x i32> zeroinitializer, align 16 define inreg <4 x i32> @mload1(ptr nocapture readonly %a0) #0 { -; CHECK-LABEL: @mload1( -; CHECK-NEXT: b0: -; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x i32>, ptr [[A0:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]] +; CHECK-LABEL: define inreg <4 x i32> @mload1( +; CHECK-SAME: ptr readonly captures(none) [[A0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[B0:.*:]] +; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x i32>, ptr [[A0]], align 16, !tbaa [[CHAR_TBAA0:![0-9]+]] ; CHECK-NEXT: ret <4 x i32> [[UNMASKEDLOAD]] ; b0: @@ -15,9 +16,10 @@ b0: } define inreg <4 x i32> @mload2() #0 { -; CHECK-LABEL: @mload2( -; CHECK-NEXT: b0: -; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x i32>, ptr @g0, align 16, !tbaa [[TBAA0]] +; CHECK-LABEL: define inreg <4 x i32> @mload2( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[B0:.*:]] +; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x i32>, ptr @g0, align 16, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[V01:%.*]] = insertelement <4 x i32> [[UNMASKEDLOAD]], i32 0, i64 0 ; CHECK-NEXT: ret <4 x i32> [[V01]] ; @@ -27,9 +29,10 @@ b0: } define void @mstore(<4 x i32> %a0, ptr nocapture readonly %a1) #0 { -; CHECK-LABEL: @mstore( -; CHECK-NEXT: b0: -; CHECK-NEXT: store <4 x i32> [[A0:%.*]], ptr [[A1:%.*]], align 16, !tbaa [[TBAA0]] +; CHECK-LABEL: define void @mstore( +; CHECK-SAME: <4 x i32> [[A0:%.*]], ptr readonly captures(none) [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:.*:]] +; CHECK-NEXT: store <4 x i32> [[A0]], ptr [[A1]], align 16, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: ret void ; b0: @@ -46,3 +49,8 @@ declare void @llvm.masked.store.v4i1.p0(<4 x i32>, ptr, i32, <4 x i1>) !1 = !{!"omnipotent char", !2, i64 0} !2 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll index e96452a3cebc8..6fc29bc2c9a28 100644 --- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll +++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -11,10 +11,11 @@ declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounw %struct.test1 = type { float } define void @test1(ptr nocapture %a, ptr nocapture %b) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: store i32 [[TMP0]], ptr [[A:%.*]], align 4, !tbaa [[TBAA0]] +; CHECK-LABEL: define void @test1( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B]], align 4, !tbaa [[FLOAT_TBAA0:![0-9]+]] +; CHECK-NEXT: store i32 [[TMP0]], ptr [[A]], align 4, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: ret void ; entry: @@ -25,7 +26,7 @@ entry: %struct.test2 = type { ptr } define ptr @test2() { -; CHECK-LABEL: @test2( +; CHECK-LABEL: define ptr @test2() { ; CHECK-NEXT: store i1 true, ptr poison, align 1 ; CHECK-NEXT: ret ptr poison ; @@ -36,10 +37,11 @@ define ptr @test2() { } define void @test3_multiple_fields(ptr nocapture %a, ptr nocapture %b) { -; CHECK-LABEL: @test3_multiple_fields( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 4 -; CHECK-NEXT: store i64 [[TMP0]], ptr [[A:%.*]], align 4 +; CHECK-LABEL: define void @test3_multiple_fields( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B]], align 4 +; CHECK-NEXT: store i64 [[TMP0]], ptr [[A]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -48,10 +50,11 @@ entry: } define void @test4_multiple_copy_first_field(ptr nocapture %a, ptr nocapture %b) { -; CHECK-LABEL: @test4_multiple_copy_first_field( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[TMP0]], ptr [[A:%.*]], align 4, !tbaa [[TBAA0]] +; CHECK-LABEL: define void @test4_multiple_copy_first_field( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B]], align 4, !tbaa [[FLOAT_TBAA0]] +; CHECK-NEXT: store i32 [[TMP0]], ptr [[A]], align 4, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: ret void ; entry: @@ -60,10 +63,11 @@ entry: } define void @test5_multiple_copy_more_than_first_field(ptr nocapture %a, ptr nocapture %b) { -; CHECK-LABEL: @test5_multiple_copy_more_than_first_field( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], ptr [[A:%.*]], align 4 +; CHECK-LABEL: define void @test5_multiple_copy_more_than_first_field( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[A]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -83,7 +87,7 @@ entry: ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[FLOAT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"float", [[META2:![0-9]+]]} ; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"} ;. diff --git a/llvm/test/Transforms/JumpThreading/ddt-crash3.ll b/llvm/test/Transforms/JumpThreading/ddt-crash3.ll index b37987bbf5cda..edaade329e9ce 100644 --- a/llvm/test/Transforms/JumpThreading/ddt-crash3.ll +++ b/llvm/test/Transforms/JumpThreading/ddt-crash3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -passes=jump-threading -verify-dom-info < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -10,19 +10,20 @@ target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: norecurse noreturn nounwind uwtable define void @hoge() local_unnamed_addr #0 { -; CHECK-LABEL: @hoge( -; CHECK-NEXT: bb: -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: [[TMP:%.*]] = load i64, ptr @global, align 8, !tbaa [[TBAA1:![0-9]+]] +; CHECK-LABEL: define void @hoge( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[TMP:%.*]] = load i64, ptr @global, align 8, !tbaa [[LONG_TBAA1:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[BB26:%.*]], label [[BB3:%.*]] -; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @global.1, align 8, !tbaa [[TBAA1]] +; CHECK-NEXT: br i1 [[TMP2]], label %[[BB26:.*]], label %[[BB3:.*]] +; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @global.1, align 8, !tbaa [[LONG_TBAA1]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[BB26]], label [[BB26]] -; CHECK: bb26: -; CHECK-NEXT: br label [[BB1]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[BB26]], label %[[BB26]] +; CHECK: [[BB26]]: +; CHECK-NEXT: br label %[[BB1]] ; bb: br label %bb1 @@ -56,3 +57,9 @@ attributes #0 = { norecurse noreturn nounwind uwtable "correctly-rounded-divide- !2 = !{!"long", !3, i64 0} !3 = !{!"omnipotent char", !4, i64 0} !4 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[LONG_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"long", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/JumpThreading/thread-loads.ll b/llvm/test/Transforms/JumpThreading/thread-loads.ll index 4749de0b248e8..cb10168547d2a 100644 --- a/llvm/test/Transforms/JumpThreading/thread-loads.ll +++ b/llvm/test/Transforms/JumpThreading/thread-loads.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=jump-threading -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes=jump-threading -S | FileCheck %s @@ -8,23 +8,24 @@ target triple = "i386-apple-darwin7" ; Test that we can thread through the block with the partially redundant load (%2). ; rdar://6402033 define i32 @test1(ptr %P) nounwind { -; CHECK-LABEL: @test1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 (...) @f1() #[[ATTR0:[0-9]+]] +; CHECK-LABEL: define i32 @test1( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 (...) @f1() #[[ATTR0]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[BB1_THREAD:%.*]] -; CHECK: bb1.thread: -; CHECK-NEXT: store i32 42, ptr [[P:%.*]], align 4 -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb1: +; CHECK-NEXT: br i1 [[TMP1]], label %[[BB1:.*]], label %[[BB1_THREAD:.*]] +; CHECK: [[BB1_THREAD]]: +; CHECK-NEXT: store i32 42, ptr [[P]], align 4 +; CHECK-NEXT: br label %[[BB3:.*]] +; CHECK: [[BB1]]: ; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[DOTPR]], 36 -; CHECK-NEXT: br i1 [[TMP2]], label [[BB3]], label [[BB2:%.*]] -; CHECK: bb2: +; CHECK-NEXT: br i1 [[TMP2]], label %[[BB3]], label %[[BB2:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 (...) @f2() #[[ATTR0]] ; CHECK-NEXT: ret i32 0 -; CHECK: bb3: -; CHECK-NEXT: [[RES_02:%.*]] = phi i32 [ 1, [[BB1_THREAD]] ], [ 0, [[BB1]] ] +; CHECK: [[BB3]]: +; CHECK-NEXT: [[RES_02:%.*]] = phi i32 [ 1, %[[BB1_THREAD]] ], [ 0, %[[BB1]] ] ; CHECK-NEXT: ret i32 [[RES_02]] ; entry: @@ -59,23 +60,24 @@ declare i32 @f2(...) ; rdar://11039258 define i32 @test2(ptr %P) nounwind { -; CHECK-LABEL: @test2( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test2( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 (...) @f1() #[[ATTR0]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[BB1_THREAD:%.*]] -; CHECK: bb1.thread: -; CHECK-NEXT: store i32 42, ptr [[P:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb1: -; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[TMP1]], label %[[BB1:.*]], label %[[BB1_THREAD:.*]] +; CHECK: [[BB1_THREAD]]: +; CHECK-NEXT: store i32 42, ptr [[P]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] +; CHECK-NEXT: br label %[[BB3:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[DOTPR]], 36 -; CHECK-NEXT: br i1 [[TMP2]], label [[BB3]], label [[BB2:%.*]] -; CHECK: bb2: +; CHECK-NEXT: br i1 [[TMP2]], label %[[BB3]], label %[[BB2:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 (...) @f2() #[[ATTR0]] ; CHECK-NEXT: ret i32 0 -; CHECK: bb3: -; CHECK-NEXT: [[RES_02:%.*]] = phi i32 [ 1, [[BB1_THREAD]] ], [ 0, [[BB1]] ] +; CHECK: [[BB3]]: +; CHECK-NEXT: [[RES_02:%.*]] = phi i32 [ 1, %[[BB1_THREAD]] ], [ 0, %[[BB1]] ] ; CHECK-NEXT: ret i32 [[RES_02]] ; entry: @@ -106,17 +108,18 @@ define i32 @test3(ptr %x, i1 %f) { ; as necessary in the predecessors. This is especially tricky because the same ; predecessor ends up with two entries in the PHI node and they must share ; a single cast. -; CHECK-LABEL: @test3( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X:%.*]], align 8 -; CHECK-NEXT: br i1 [[F:%.*]], label [[IF_END57:%.*]], label [[IF_END57]] -; CHECK: if.end57: -; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP1]], [[ENTRY]] ] +; CHECK-LABEL: define i32 @test3( +; CHECK-SAME: ptr [[X:%.*]], i1 [[F:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8 +; CHECK-NEXT: br i1 [[F]], label %[[IF_END57:.*]], label %[[IF_END57]] +; CHECK: [[IF_END57]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ [[TMP1]], %[[ENTRY]] ] ; CHECK-NEXT: [[TOBOOL59:%.*]] = icmp eq ptr [[TMP3]], null -; CHECK-NEXT: br i1 [[TOBOOL59]], label [[RETURN:%.*]], label [[IF_THEN60:%.*]] -; CHECK: if.then60: +; CHECK-NEXT: br i1 [[TOBOOL59]], label %[[RETURN:.*]], label %[[IF_THEN60:.*]] +; CHECK: [[IF_THEN60]]: ; CHECK-NEXT: ret i32 42 -; CHECK: return: +; CHECK: [[RETURN]]: ; CHECK-NEXT: ret i32 13 ; entry: @@ -139,23 +142,24 @@ return: } define i32 @test4(ptr %P) { -; CHECK-LABEL: @test4( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test4( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[V0:%.*]] = tail call i32 (...) @f1() ; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[V0]], 0 -; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[BB1_THREAD:%.*]] -; CHECK: bb1.thread: -; CHECK-NEXT: store atomic i32 42, ptr [[P:%.*]] unordered, align 4 -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb1: +; CHECK-NEXT: br i1 [[V1]], label %[[BB1:.*]], label %[[BB1_THREAD:.*]] +; CHECK: [[BB1_THREAD]]: +; CHECK-NEXT: store atomic i32 42, ptr [[P]] unordered, align 4 +; CHECK-NEXT: br label %[[BB3:.*]] +; CHECK: [[BB1]]: ; CHECK-NEXT: [[V2_PR:%.*]] = load atomic i32, ptr [[P]] unordered, align 4 ; CHECK-NEXT: [[V3:%.*]] = icmp sgt i32 [[V2_PR]], 36 -; CHECK-NEXT: br i1 [[V3]], label [[BB3]], label [[BB2:%.*]] -; CHECK: bb2: +; CHECK-NEXT: br i1 [[V3]], label %[[BB3]], label %[[BB2:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[V4:%.*]] = tail call i32 (...) @f2() ; CHECK-NEXT: ret i32 0 -; CHECK: bb3: -; CHECK-NEXT: [[RES_04:%.*]] = phi i32 [ 1, [[BB1_THREAD]] ], [ 0, [[BB1]] ] +; CHECK: [[BB3]]: +; CHECK-NEXT: [[RES_04:%.*]] = phi i32 [ 1, %[[BB1_THREAD]] ], [ 0, %[[BB1]] ] ; CHECK-NEXT: ret i32 [[RES_04]] ; entry: @@ -183,23 +187,24 @@ bb3: define i32 @test5(ptr %P) { ; Negative test -; CHECK-LABEL: @test5( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test5( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[V0:%.*]] = tail call i32 (...) @f1() ; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[V0]], 0 -; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[BB:%.*]] -; CHECK: bb: -; CHECK-NEXT: store atomic i32 42, ptr [[P:%.*]] release, align 4 -; CHECK-NEXT: br label [[BB1]] -; CHECK: bb1: -; CHECK-NEXT: [[RES_0:%.*]] = phi i32 [ 1, [[BB]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br i1 [[V1]], label %[[BB1:.*]], label %[[BB:.*]] +; CHECK: [[BB]]: +; CHECK-NEXT: store atomic i32 42, ptr [[P]] release, align 4 +; CHECK-NEXT: br label %[[BB1]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[RES_0:%.*]] = phi i32 [ 1, %[[BB]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[V2:%.*]] = load atomic i32, ptr [[P]] acquire, align 4 ; CHECK-NEXT: [[V3:%.*]] = icmp sgt i32 [[V2]], 36 -; CHECK-NEXT: br i1 [[V3]], label [[BB3:%.*]], label [[BB2:%.*]] -; CHECK: bb2: +; CHECK-NEXT: br i1 [[V3]], label %[[BB3:.*]], label %[[BB2:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[V4:%.*]] = tail call i32 (...) @f2() ; CHECK-NEXT: ret i32 [[RES_0]] -; CHECK: bb3: +; CHECK: [[BB3]]: ; CHECK-NEXT: ret i32 [[RES_0]] ; entry: @@ -228,23 +233,24 @@ bb3: define i32 @test6(ptr %P) { ; Negative test -; CHECK-LABEL: @test6( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test6( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[V0:%.*]] = tail call i32 (...) @f1() ; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[V0]], 0 -; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[BB:%.*]] -; CHECK: bb: -; CHECK-NEXT: store i32 42, ptr [[P:%.*]], align 4 -; CHECK-NEXT: br label [[BB1]] -; CHECK: bb1: -; CHECK-NEXT: [[RES_0:%.*]] = phi i32 [ 1, [[BB]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br i1 [[V1]], label %[[BB1:.*]], label %[[BB:.*]] +; CHECK: [[BB]]: +; CHECK-NEXT: store i32 42, ptr [[P]], align 4 +; CHECK-NEXT: br label %[[BB1]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[RES_0:%.*]] = phi i32 [ 1, %[[BB]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[V2:%.*]] = load atomic i32, ptr [[P]] acquire, align 4 ; CHECK-NEXT: [[V3:%.*]] = icmp sgt i32 [[V2]], 36 -; CHECK-NEXT: br i1 [[V3]], label [[BB3:%.*]], label [[BB2:%.*]] -; CHECK: bb2: +; CHECK-NEXT: br i1 [[V3]], label %[[BB3:.*]], label %[[BB2:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[V4:%.*]] = tail call i32 (...) @f2() ; CHECK-NEXT: ret i32 [[RES_0]] -; CHECK: bb3: +; CHECK: [[BB3]]: ; CHECK-NEXT: ret i32 [[RES_0]] ; entry: @@ -273,23 +279,24 @@ bb3: define i32 @test7(ptr %P) { ; Negative test -; CHECK-LABEL: @test7( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test7( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[V0:%.*]] = tail call i32 (...) @f1() ; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[V0]], 0 -; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[BB:%.*]] -; CHECK: bb: -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4 -; CHECK-NEXT: br label [[BB1]] -; CHECK: bb1: -; CHECK-NEXT: [[RES_0:%.*]] = phi i32 [ 1, [[BB]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br i1 [[V1]], label %[[BB1:.*]], label %[[BB:.*]] +; CHECK: [[BB]]: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: br label %[[BB1]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[RES_0:%.*]] = phi i32 [ 1, %[[BB]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[V2:%.*]] = load atomic i32, ptr [[P]] acquire, align 4 ; CHECK-NEXT: [[V3:%.*]] = icmp sgt i32 [[V2]], 36 -; CHECK-NEXT: br i1 [[V3]], label [[BB3:%.*]], label [[BB2:%.*]] -; CHECK: bb2: +; CHECK-NEXT: br i1 [[V3]], label %[[BB3:.*]], label %[[BB2:.*]] +; CHECK: [[BB2]]: ; CHECK-NEXT: [[V4:%.*]] = tail call i32 (...) @f2() ; CHECK-NEXT: ret i32 [[RES_0]] -; CHECK: bb3: +; CHECK: [[BB3]]: ; CHECK-NEXT: ret i32 [[RES_0]] ; entry: @@ -319,10 +326,11 @@ bb3: ; We keep the tbaa and range metadata for the first load, as it dominates the ; second load. Hence we can eliminate the branch. define void @test8(ptr, ptr, ptr) { -; CHECK-LABEL: @test8( -; CHECK-NEXT: ret2: -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]], !range [[RNG4:![0-9]+]], !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]], !noundef [[META10:![0-9]+]] -; CHECK-NEXT: store i32 [[A]], ptr [[TMP1:%.*]], align 4 +; CHECK-LABEL: define void @test8( +; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) { +; CHECK-NEXT: [[RET2:.*:]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0]], !range [[RNG4:![0-9]+]], !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]], !noundef [[META10:![0-9]+]] +; CHECK-NEXT: store i32 [[A]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[XXX:%.*]] = tail call i32 (...) @f1() #[[ATTR0]] ; CHECK-NEXT: ret void ; @@ -344,24 +352,25 @@ ret2: ; we need to remove metadata from the existing load, and add appropriate ; metadata to the newly inserted load. define void @test9(ptr, ptr, ptr, i1 %c) { -; CHECK-LABEL: @test9( -; CHECK-NEXT: br i1 [[C:%.*]], label [[D1:%.*]], label [[D2:%.*]] -; CHECK: d1: -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4 -; CHECK-NEXT: br label [[D3:%.*]] -; CHECK: d2: +; CHECK-LABEL: define void @test9( +; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label %[[D1:.*]], label %[[D2:.*]] +; CHECK: [[D1]]: +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0]], align 4 +; CHECK-NEXT: br label %[[D3:.*]] +; CHECK: [[D2]]: ; CHECK-NEXT: [[XXXX:%.*]] = tail call i32 (...) @f1() #[[ATTR0]] -; CHECK-NEXT: [[B_PR:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[D3]] -; CHECK: d3: -; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_PR]], [[D2]] ], [ [[A]], [[D1]] ] -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 1, [[D2]] ], [ [[A]], [[D1]] ] -; CHECK-NEXT: store i32 [[P]], ptr [[TMP1:%.*]], align 4 +; CHECK-NEXT: [[B_PR:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[D3]] +; CHECK: [[D3]]: +; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_PR]], %[[D2]] ], [ [[A]], %[[D1]] ] +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 1, %[[D2]] ], [ [[A]], %[[D1]] ] +; CHECK-NEXT: store i32 [[P]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[B]], 8 -; CHECK-NEXT: br i1 [[C2]], label [[RET1:%.*]], label [[RET2:%.*]] -; CHECK: ret1: +; CHECK-NEXT: br i1 [[C2]], label %[[RET1:.*]], label %[[RET2:.*]] +; CHECK: [[RET1]]: ; CHECK-NEXT: ret void -; CHECK: ret2: +; CHECK: [[RET2]]: ; CHECK-NEXT: [[XXX:%.*]] = tail call i32 (...) @f1() #[[ATTR0]] ; CHECK-NEXT: ret void ; @@ -391,27 +400,28 @@ ret2: } define i32 @fn_noalias(i1 %c2,ptr noalias %P, ptr noalias %P2) { -; CHECK-LABEL: @fn_noalias( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C2:%.*]], label [[COND2:%.*]], label [[COND1:%.*]] -; CHECK: cond1: -; CHECK-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 4 -; CHECK-NEXT: store i64 42, ptr [[P2:%.*]], align 4 +; CHECK-LABEL: define i32 @fn_noalias( +; CHECK-SAME: i1 [[C2:%.*]], ptr noalias [[P:%.*]], ptr noalias [[P2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C2]], label %[[COND2:.*]], label %[[COND1:.*]] +; CHECK: [[COND1]]: +; CHECK-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4 +; CHECK-NEXT: store i64 42, ptr [[P2]], align 4 ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[L1]], 0 -; CHECK-NEXT: br i1 [[C]], label [[COND2_THREAD:%.*]], label [[END:%.*]] -; CHECK: cond2.thread: +; CHECK-NEXT: br i1 [[C]], label %[[COND2_THREAD:.*]], label %[[END:.*]] +; CHECK: [[COND2_THREAD]]: ; CHECK-NEXT: call void @fn2(i64 [[L1]]) -; CHECK-NEXT: br label [[COND3:%.*]] -; CHECK: cond2: +; CHECK-NEXT: br label %[[COND3:.*]] +; CHECK: [[COND2]]: ; CHECK-NEXT: [[L2_PR:%.*]] = load i64, ptr [[P]], align 4 ; CHECK-NEXT: call void @fn2(i64 [[L2_PR]]) ; CHECK-NEXT: [[C3:%.*]] = icmp eq i64 [[L2_PR]], 0 -; CHECK-NEXT: br i1 [[C3]], label [[COND3]], label [[END]] -; CHECK: cond3: -; CHECK-NEXT: [[L23:%.*]] = phi i64 [ [[L1]], [[COND2_THREAD]] ], [ [[L2_PR]], [[COND2]] ] +; CHECK-NEXT: br i1 [[C3]], label %[[COND3]], label %[[END]] +; CHECK: [[COND3]]: +; CHECK-NEXT: [[L23:%.*]] = phi i64 [ [[L1]], %[[COND2_THREAD]] ], [ [[L2_PR]], %[[COND2]] ] ; CHECK-NEXT: call void @fn3(i64 [[L23]]) -; CHECK-NEXT: br label [[END]] -; CHECK: end: +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: ; CHECK-NEXT: ret i32 0 ; entry: @@ -447,43 +457,44 @@ end: @last = internal unnamed_addr global [65 x ptr] zeroinitializer, align 8 @next_status = internal unnamed_addr global [65 x %struct.NEXT_MOVE] zeroinitializer, align 8 define fastcc i32 @Search(i64 %idxprom.i, i64 %idxprom.i89, i32 %c) { -; CHECK-LABEL: @Search( -; CHECK-NEXT: cond.true282: -; CHECK-NEXT: [[ARRAYIDX185:%.*]] = getelementptr inbounds [65 x i32], ptr @hash_move, i64 0, i64 [[IDXPROM_I:%.*]] +; CHECK-LABEL: define fastcc i32 @Search( +; CHECK-SAME: i64 [[IDXPROM_I:%.*]], i64 [[IDXPROM_I89:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[COND_TRUE282:.*:]] +; CHECK-NEXT: [[ARRAYIDX185:%.*]] = getelementptr inbounds [65 x i32], ptr @hash_move, i64 0, i64 [[IDXPROM_I]] ; CHECK-NEXT: [[ARRAYIDX307:%.*]] = getelementptr inbounds [65 x i32], ptr @current_move, i64 0, i64 [[IDXPROM_I]] ; CHECK-NEXT: [[ARRAYIDX89:%.*]] = getelementptr inbounds [65 x ptr], ptr @last, i64 0, i64 [[IDXPROM_I]] ; CHECK-NEXT: [[PHASE:%.*]] = getelementptr inbounds [65 x %struct.NEXT_MOVE], ptr @next_status, i64 0, i64 [[IDXPROM_I]], i32 0 -; CHECK-NEXT: switch i32 [[C:%.*]], label [[CLEANUP:%.*]] [ -; CHECK-NEXT: i32 1, label [[SW_BB_I:%.*]] -; CHECK-NEXT: i32 0, label [[SW_BB21_I:%.*]] +; CHECK-NEXT: switch i32 [[C]], label %[[CLEANUP:.*]] [ +; CHECK-NEXT: i32 1, label %[[SW_BB_I:.*]] +; CHECK-NEXT: i32 0, label %[[SW_BB21_I:.*]] ; CHECK-NEXT: ] -; CHECK: sw.bb.i: +; CHECK: [[SW_BB_I]]: ; CHECK-NEXT: [[CALL_I62:%.*]] = call fastcc ptr @GenerateCheckEvasions() ; CHECK-NEXT: store ptr [[CALL_I62]], ptr [[ARRAYIDX89]], align 8 ; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[ARRAYIDX185]], align 4 ; CHECK-NEXT: [[TOBOOL_I63:%.*]] = icmp eq i32 [[L2]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_I63]], label [[SW_BB21_I_THREAD:%.*]], label [[IF_THEN_I64:%.*]] -; CHECK: sw.bb21.i.thread: +; CHECK-NEXT: br i1 [[TOBOOL_I63]], label %[[SW_BB21_I_THREAD:.*]], label %[[IF_THEN_I64:.*]] +; CHECK: [[SW_BB21_I_THREAD]]: ; CHECK-NEXT: store i32 10, ptr [[PHASE]], align 8 -; CHECK-NEXT: br label [[DO_BODY_PREHEADER_I67:%.*]] -; CHECK: if.then.i64: +; CHECK-NEXT: br label %[[DO_BODY_PREHEADER_I67:.*]] +; CHECK: [[IF_THEN_I64]]: ; CHECK-NEXT: store i32 7, ptr [[PHASE]], align 8 ; CHECK-NEXT: store i32 [[L2]], ptr [[ARRAYIDX307]], align 4 ; CHECK-NEXT: [[CALL16_I:%.*]] = call fastcc i32 @ValidMove(i32 [[L2]]) ; CHECK-NEXT: [[TOBOOL17_I:%.*]] = icmp eq i32 [[CALL16_I]], 0 -; CHECK-NEXT: br i1 [[TOBOOL17_I]], label [[IF_ELSE_I65:%.*]], label [[CLEANUP]] -; CHECK: if.else.i65: +; CHECK-NEXT: br i1 [[TOBOOL17_I]], label %[[IF_ELSE_I65:.*]], label %[[CLEANUP]] +; CHECK: [[IF_ELSE_I65]]: ; CHECK-NEXT: call void @f65() -; CHECK-NEXT: br label [[SW_BB21_I]] -; CHECK: sw.bb21.i: +; CHECK-NEXT: br label %[[SW_BB21_I]] +; CHECK: [[SW_BB21_I]]: ; CHECK-NEXT: [[L3_PR:%.*]] = load i32, ptr [[ARRAYIDX185]], align 4 ; CHECK-NEXT: store i32 10, ptr [[PHASE]], align 8 ; CHECK-NEXT: [[TOBOOL27_I:%.*]] = icmp eq i32 [[L3_PR]], 0 -; CHECK-NEXT: br i1 [[TOBOOL27_I]], label [[DO_BODY_PREHEADER_I67]], label [[CLEANUP]] -; CHECK: do.body.preheader.i67: +; CHECK-NEXT: br i1 [[TOBOOL27_I]], label %[[DO_BODY_PREHEADER_I67]], label %[[CLEANUP]] +; CHECK: [[DO_BODY_PREHEADER_I67]]: ; CHECK-NEXT: call void @f67() ; CHECK-NEXT: ret i32 67 -; CHECK: cleanup: +; CHECK: [[CLEANUP]]: ; CHECK-NEXT: call void @Cleanup() ; CHECK-NEXT: ret i32 0 ; @@ -543,22 +554,23 @@ declare void @Cleanup() declare void @f65() define i32 @fn_SinglePred(i1 %c2,ptr %P) { -; CHECK-LABEL: @fn_SinglePred( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 4 +; CHECK-LABEL: define i32 @fn_SinglePred( +; CHECK-SAME: i1 [[C2:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4 ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[L1]], 0 -; CHECK-NEXT: br i1 [[C]], label [[COND3:%.*]], label [[COND1:%.*]] -; CHECK: cond1: -; CHECK-NEXT: br i1 [[C2:%.*]], label [[COND2:%.*]], label [[END:%.*]] -; CHECK: cond2: -; CHECK-NEXT: [[L2:%.*]] = phi i64 [ [[L1]], [[COND1]] ] +; CHECK-NEXT: br i1 [[C]], label %[[COND3:.*]], label %[[COND1:.*]] +; CHECK: [[COND1]]: +; CHECK-NEXT: br i1 [[C2]], label %[[COND2:.*]], label %[[END:.*]] +; CHECK: [[COND2]]: +; CHECK-NEXT: [[L2:%.*]] = phi i64 [ [[L1]], %[[COND1]] ] ; CHECK-NEXT: call void @fn2(i64 [[L2]]) -; CHECK-NEXT: br label [[END]] -; CHECK: cond3: +; CHECK-NEXT: br label %[[END]] +; CHECK: [[COND3]]: ; CHECK-NEXT: call void @fn2(i64 [[L1]]) ; CHECK-NEXT: call void @fn3(i64 [[L1]]) -; CHECK-NEXT: br label [[END]] -; CHECK: end: +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: ; CHECK-NEXT: ret i32 0 ; @@ -585,24 +597,25 @@ end: } define i32 @fn_SinglePredMultihop(i1 %c1, i1 %c2,ptr %P) { -; CHECK-LABEL: @fn_SinglePredMultihop( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 4 +; CHECK-LABEL: define i32 @fn_SinglePredMultihop( +; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4 ; CHECK-NEXT: [[C0:%.*]] = icmp eq i64 [[L1]], 0 -; CHECK-NEXT: br i1 [[C0]], label [[COND3:%.*]], label [[COND0:%.*]] -; CHECK: cond0: -; CHECK-NEXT: br i1 [[C1:%.*]], label [[COND1:%.*]], label [[END:%.*]] -; CHECK: cond1: -; CHECK-NEXT: br i1 [[C2:%.*]], label [[COND2:%.*]], label [[END]] -; CHECK: cond2: -; CHECK-NEXT: [[L2:%.*]] = phi i64 [ [[L1]], [[COND1]] ] +; CHECK-NEXT: br i1 [[C0]], label %[[COND3:.*]], label %[[COND0:.*]] +; CHECK: [[COND0]]: +; CHECK-NEXT: br i1 [[C1]], label %[[COND1:.*]], label %[[END:.*]] +; CHECK: [[COND1]]: +; CHECK-NEXT: br i1 [[C2]], label %[[COND2:.*]], label %[[END]] +; CHECK: [[COND2]]: +; CHECK-NEXT: [[L2:%.*]] = phi i64 [ [[L1]], %[[COND1]] ] ; CHECK-NEXT: call void @fn2(i64 [[L2]]) -; CHECK-NEXT: br label [[END]] -; CHECK: cond3: +; CHECK-NEXT: br label %[[END]] +; CHECK: [[COND3]]: ; CHECK-NEXT: call void @fn2(i64 [[L1]]) ; CHECK-NEXT: call void @fn3(i64 [[L1]]) -; CHECK-NEXT: br label [[END]] -; CHECK: end: +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: ; CHECK-NEXT: ret i32 0 ; @@ -640,19 +653,20 @@ declare void @fn3(i64) ; store. ; define i32 @phi_translate_partial_redundant_loads(i32, ptr, ptr) { -; CHECK-LABEL: @phi_translate_partial_redundant_loads( -; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[TMP0:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP0]], label [[MERGE_THREAD:%.*]], label [[MERGE:%.*]] -; CHECK: merge.thread: -; CHECK-NEXT: store i32 1, ptr [[TMP1:%.*]], align 4 -; CHECK-NEXT: br label [[LEFT_X:%.*]] -; CHECK: merge: -; CHECK-NEXT: [[NEWLOAD_PR:%.*]] = load i32, ptr [[TMP2:%.*]], align 4 +; CHECK-LABEL: define i32 @phi_translate_partial_redundant_loads( +; CHECK-SAME: i32 [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) { +; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label %[[MERGE_THREAD:.*]], label %[[MERGE:.*]] +; CHECK: [[MERGE_THREAD]]: +; CHECK-NEXT: store i32 1, ptr [[TMP1]], align 4 +; CHECK-NEXT: br label %[[LEFT_X:.*]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[NEWLOAD_PR:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[NEWLOAD_PR]], 5 -; CHECK-NEXT: br i1 [[CMP1]], label [[LEFT_X]], label [[RIGHT_X:%.*]] -; CHECK: left_x: +; CHECK-NEXT: br i1 [[CMP1]], label %[[LEFT_X]], label %[[RIGHT_X:.*]] +; CHECK: [[LEFT_X]]: ; CHECK-NEXT: ret i32 20 -; CHECK: right_x: +; CHECK: [[RIGHT_X]]: ; CHECK-NEXT: ret i32 10 ; %cmp0 = icmp ne i32 %0, 0 @@ -693,7 +707,7 @@ right_x: !10 = !{!8} !11 = !{} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]]} ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]} ; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll b/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll index 33027189dc5c0..0d32e508edf5f 100644 --- a/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll +++ b/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=licm -S | FileCheck %s ; PR9634 @@ -7,21 +7,21 @@ define void @f() { ; CHECK-LABEL: define void @f() { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[L_87_I:%.*]] = alloca [9 x i16], align 16 -; CHECK-NEXT: [[G_58_PROMOTED:%.*]] = load i32, ptr @g_58, align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ [[G_58_PROMOTED]], [[ENTRY:%.*]] ], [ [[OR:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[INC12:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[G_58_PROMOTED:%.*]] = load i32, ptr @g_58, align 4, !tbaa [[INT_TBAA0:![0-9]+]] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ [[G_58_PROMOTED]], %[[ENTRY]] ], [ [[OR:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INC12:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[OR]] = or i32 [[TMP31]], 10 ; CHECK-NEXT: [[INC]] = add nsw i32 [[INC12]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 4 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ] -; CHECK-NEXT: store ptr @g_58, ptr @g_116, align 8, !tbaa [[TBAA4:![0-9]+]] -; CHECK-NEXT: store i32 [[OR_LCSSA]], ptr @g_58, align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], %[[FOR_BODY]] ] +; CHECK-NEXT: store ptr @g_58, ptr @g_116, align 8, !tbaa [[ANYPTR_TBAA4:![0-9]+]] +; CHECK-NEXT: store i32 [[OR_LCSSA]], ptr @g_58, align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: ret void ; @@ -51,3 +51,11 @@ for.end: ; preds = %for.inc !4 = !{!6, !6, i64 0} !5 = !{!"any pointer", !1} !6 = !{!"int", !1} +;. +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[ANYPTR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"any pointer", [[META2]]} +;. diff --git a/llvm/test/Transforms/LICM/pr50367.ll b/llvm/test/Transforms/LICM/pr50367.ll index 7fd176b6c6bb6..6aafff74f61d8 100644 --- a/llvm/test/Transforms/LICM/pr50367.ll +++ b/llvm/test/Transforms/LICM/pr50367.ll @@ -1,23 +1,24 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -passes='loop-mssa(licm)' < %s | FileCheck %s @e = external dso_local global ptr, align 8 define void @main(i1 %arg, ptr %arg1) { -; CHECK-LABEL: @main( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP1:%.*]] -; CHECK: loop1: -; CHECK-NEXT: br label [[LOOP2:%.*]] -; CHECK: loop2: -; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]] -; CHECK: loop2.latch: -; CHECK-NEXT: store i32 0, ptr [[ARG1:%.*]], align 4 -; CHECK-NEXT: br label [[LOOP2]] -; CHECK: loop.latch: -; CHECK-NEXT: store ptr null, ptr @e, align 8, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4, !tbaa [[TBAA4:![0-9]+]] -; CHECK-NEXT: br label [[LOOP1]] +; CHECK-LABEL: define void @main( +; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP1:.*]] +; CHECK: [[LOOP1]]: +; CHECK-NEXT: br label %[[LOOP2:.*]] +; CHECK: [[LOOP2]]: +; CHECK-NEXT: br i1 [[ARG]], label %[[LOOP2_LATCH:.*]], label %[[LOOP_LATCH:.*]] +; CHECK: [[LOOP2_LATCH]]: +; CHECK-NEXT: store i32 0, ptr [[ARG1]], align 4 +; CHECK-NEXT: br label %[[LOOP2]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: store ptr null, ptr @e, align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]] +; CHECK-NEXT: [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[ANYPTR_TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] +; CHECK-NEXT: br label %[[LOOP1]] ; entry: br label %loop1 @@ -40,19 +41,20 @@ loop.latch: } define void @store_null(i1 %arg) { -; CHECK-LABEL: @store_null( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP1:%.*]] -; CHECK: loop1: -; CHECK-NEXT: br label [[LOOP2:%.*]] -; CHECK: loop2: -; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]] -; CHECK: loop2.latch: +; CHECK-LABEL: define void @store_null( +; CHECK-SAME: i1 [[ARG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP1:.*]] +; CHECK: [[LOOP1]]: +; CHECK-NEXT: br label %[[LOOP2:.*]] +; CHECK: [[LOOP2]]: +; CHECK-NEXT: br i1 [[ARG]], label %[[LOOP2_LATCH:.*]], label %[[LOOP_LATCH:.*]] +; CHECK: [[LOOP2_LATCH]]: ; CHECK-NEXT: store i32 0, ptr null, align 4 -; CHECK-NEXT: br label [[LOOP2]] -; CHECK: loop.latch: -; CHECK-NEXT: store i32 0, ptr null, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: br label [[LOOP1]] +; CHECK-NEXT: br label %[[LOOP2]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: store i32 0, ptr null, align 4, !tbaa [[INT_TBAA4]] +; CHECK-NEXT: br label %[[LOOP1]] ; entry: br label %loop1 @@ -80,3 +82,11 @@ loop.latch: !3 = !{!"Simple C/C++ TBAA"} !4 = !{!5, !5, i64 0} !5 = !{!"int", !2, i64 0} +;. +; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"int", [[META2]], i64 0} +;. diff --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll index bd3960e846b42..3af65df55a099 100644 --- a/llvm/test/Transforms/LICM/scalar-promote.ll +++ b/llvm/test/Transforms/LICM/scalar-promote.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 6 ; RUN: opt < %s -passes=licm -S | FileCheck %s ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='require,require,require,require,loop-mssa(licm)' -S %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @@ -6,19 +6,20 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 @X = global i32 7 ; [#uses=4] define void @test1(i32 %i) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: Entry: +; CHECK-LABEL: define void @test1( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[X_PROMOTED:%.*]] = load i32, ptr @X, align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: Loop: -; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[X2]] = add i32 [[X21]], 1 ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]] -; CHECK: Out: -; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[COND]], label %[[OUT:.*]], label %[[LOOP]] +; CHECK: [[OUT]]: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ] ; CHECK-NEXT: store i32 [[X2_LCSSA]], ptr @X, align 4 ; CHECK-NEXT: ret void ; @@ -39,18 +40,19 @@ Out: } define void @test2(i32 %i) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: Entry: +; CHECK-LABEL: define void @test2( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[X1:%.*]] = getelementptr i32, ptr @X, i64 1 ; CHECK-NEXT: [[X2:%.*]] = getelementptr i32, ptr @X, i64 1 ; CHECK-NEXT: [[X1_PROMOTED:%.*]] = load i32, ptr [[X1]], align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: Loop: -; CHECK-NEXT: [[A1:%.*]] = phi i32 [ [[V:%.*]], [[LOOP]] ], [ [[X1_PROMOTED]], [[ENTRY:%.*]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[A1:%.*]] = phi i32 [ [[V:%.*]], %[[LOOP]] ], [ [[X1_PROMOTED]], %[[ENTRY]] ] ; CHECK-NEXT: [[V]] = add i32 [[A1]], 1 -; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] -; CHECK: Exit: -; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i32 [ [[V]], [[LOOP]] ] +; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i32 [ [[V]], %[[LOOP]] ] ; CHECK-NEXT: store i32 [[V_LCSSA]], ptr [[X1]], align 4 ; CHECK-NEXT: ret void ; @@ -70,14 +72,15 @@ Exit: ; preds = %Loop } define void @test3(i32 %i) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: Loop: +; CHECK-LABEL: define void @test3( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: ; CHECK-NEXT: [[X:%.*]] = load volatile i32, ptr @X, align 4 ; CHECK-NEXT: [[X2:%.*]] = add i32 [[X]], 1 ; CHECK-NEXT: store i32 [[X2]], ptr @X, align 4 -; CHECK-NEXT: br i1 true, label [[OUT:%.*]], label [[LOOP]] -; CHECK: Out: +; CHECK-NEXT: br i1 true, label %[[OUT:.*]], label %[[LOOP]] +; CHECK: [[OUT]]: ; CHECK-NEXT: ret void ; br label %Loop @@ -94,14 +97,15 @@ Out: ; preds = %Loop ; Should not promote this to a register define void @test3b(i32 %i) { -; CHECK-LABEL: @test3b( -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: Loop: +; CHECK-LABEL: define void @test3b( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr @X, align 4 ; CHECK-NEXT: [[X2:%.*]] = add i32 [[X]], 1 ; CHECK-NEXT: store volatile i32 [[X2]], ptr @X, align 4 -; CHECK-NEXT: br i1 true, label [[OUT:%.*]], label [[LOOP]] -; CHECK: Out: +; CHECK-NEXT: br i1 true, label %[[OUT:.*]], label %[[LOOP]] +; CHECK: [[OUT]]: ; CHECK-NEXT: ret void ; br label %Loop @@ -119,30 +123,31 @@ Out: ; preds = %Loop ; Should have promoted 'handle2' accesses. ; Should not have promoted offsetx1 loads. define void @test4(ptr %x, i8 %n) { -; CHECK-LABEL: @test4( +; CHECK-LABEL: define void @test4( +; CHECK-SAME: ptr [[X:%.*]], i8 [[N:%.*]]) { ; CHECK-NEXT: [[HANDLE1:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: [[HANDLE2:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: store ptr [[X:%.*]], ptr [[HANDLE1]], align 8 +; CHECK-NEXT: store ptr [[X]], ptr [[HANDLE1]], align 8 ; CHECK-NEXT: [[TMP:%.*]] = getelementptr i8, ptr [[X]], i64 8 ; CHECK-NEXT: [[OFFSETX1:%.*]] = load ptr, ptr [[HANDLE1]], align 8 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: br label [[SUBLOOP:%.*]] -; CHECK: subloop: -; CHECK-NEXT: [[NEWOFFSETX21:%.*]] = phi ptr [ [[TMP]], [[LOOP]] ], [ [[NEWOFFSETX2:%.*]], [[SUBLOOP]] ] -; CHECK-NEXT: [[COUNT:%.*]] = phi i8 [ 0, [[LOOP]] ], [ [[NEXTCOUNT:%.*]], [[SUBLOOP]] ] -; CHECK-NEXT: store i8 [[N:%.*]], ptr [[NEWOFFSETX21]], align 1 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: br label %[[SUBLOOP:.*]] +; CHECK: [[SUBLOOP]]: +; CHECK-NEXT: [[NEWOFFSETX21:%.*]] = phi ptr [ [[TMP]], %[[LOOP]] ], [ [[NEWOFFSETX2:%.*]], %[[SUBLOOP]] ] +; CHECK-NEXT: [[COUNT:%.*]] = phi i8 [ 0, %[[LOOP]] ], [ [[NEXTCOUNT:%.*]], %[[SUBLOOP]] ] +; CHECK-NEXT: store i8 [[N]], ptr [[NEWOFFSETX21]], align 1 ; CHECK-NEXT: [[NEWOFFSETX2]] = getelementptr i8, ptr [[NEWOFFSETX21]], i64 -1 ; CHECK-NEXT: [[NEXTCOUNT]] = add i8 [[COUNT]], 1 ; CHECK-NEXT: [[INNEREXITCOND:%.*]] = icmp sge i8 [[NEXTCOUNT]], 8 -; CHECK-NEXT: br i1 [[INNEREXITCOND]], label [[INNEREXIT:%.*]], label [[SUBLOOP]] -; CHECK: innerexit: -; CHECK-NEXT: [[NEWOFFSETX2_LCSSA:%.*]] = phi ptr [ [[NEWOFFSETX2]], [[SUBLOOP]] ] +; CHECK-NEXT: br i1 [[INNEREXITCOND]], label %[[INNEREXIT:.*]], label %[[SUBLOOP]] +; CHECK: [[INNEREXIT]]: +; CHECK-NEXT: [[NEWOFFSETX2_LCSSA:%.*]] = phi ptr [ [[NEWOFFSETX2]], %[[SUBLOOP]] ] ; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[OFFSETX1]], align 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: -; CHECK-NEXT: [[NEWOFFSETX2_LCSSA_LCSSA:%.*]] = phi ptr [ [[NEWOFFSETX2_LCSSA]], [[INNEREXIT]] ] +; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[NEWOFFSETX2_LCSSA_LCSSA:%.*]] = phi ptr [ [[NEWOFFSETX2_LCSSA]], %[[INNEREXIT]] ] ; CHECK-NEXT: store ptr [[NEWOFFSETX2_LCSSA_LCSSA]], ptr [[HANDLE2]], align 8 ; CHECK-NEXT: ret void ; @@ -177,20 +182,21 @@ exit: } define void @test5(i32 %i, ptr noalias %P2) { -; CHECK-LABEL: @test5( -; CHECK-NEXT: Entry: +; CHECK-LABEL: define void @test5( +; CHECK-SAME: i32 [[I:%.*]], ptr noalias [[P2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[X_PROMOTED:%.*]] = load i32, ptr @X, align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: Loop: -; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[X2]] = add i32 [[X21]], 1 -; CHECK-NEXT: store atomic ptr @X, ptr [[P2:%.*]] monotonic, align 8 +; CHECK-NEXT: store atomic ptr @X, ptr [[P2]] monotonic, align 8 ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]] -; CHECK: Out: -; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[COND]], label %[[OUT:.*]], label %[[LOOP]] +; CHECK: [[OUT]]: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ] ; CHECK-NEXT: store i32 [[X2_LCSSA]], ptr @X, align 4 ; CHECK-NEXT: ret void ; @@ -217,28 +223,29 @@ Out: ; PR14753 - Preserve TBAA tags when promoting values in a loop. define void @test6(i32 %n, ptr nocapture %a, ptr %gi) { -; CHECK-LABEL: @test6( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 0, ptr [[GI:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[GI_PROMOTED:%.*]] = load i32, ptr [[GI]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[GI_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[STOREMERGE2:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC]], [[FOR_BODY]] ] +; CHECK-LABEL: define void @test6( +; CHECK-SAME: i32 [[N:%.*]], ptr captures(none) [[A:%.*]], ptr [[GI:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store i32 0, ptr [[GI]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[N]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_BODY_LR_PH]]: +; CHECK-NEXT: [[GI_PROMOTED:%.*]] = load i32, ptr [[GI]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[GI_PROMOTED]], %[[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[STOREMERGE2:%.*]] = phi i32 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[INC]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STOREMERGE2]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !tbaa [[FLOAT_TBAA4:![0-9]+]] ; CHECK-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] -; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] -; CHECK-NEXT: store i32 [[INC_LCSSA]], ptr [[GI]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_FOR_END_CRIT_EDGE:.*]] +; CHECK: [[FOR_COND_FOR_END_CRIT_EDGE]]: +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], %[[FOR_BODY]] ] +; CHECK-NEXT: store i32 [[INC_LCSSA]], ptr [[GI]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; entry: @@ -273,21 +280,21 @@ declare void @capture(ptr) ; We can promote even if opaque may throw. define i32 @test7() { -; CHECK-LABEL: @test7( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test7() { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @capture(ptr [[LOCAL]]) ; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[X2]] = call i32 @opaque(i32 [[X21]]) ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: -; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ] ; CHECK-NEXT: store i32 [[X2_LCSSA]], ptr [[LOCAL]], align 4 ; CHECK-NEXT: [[RET:%.*]] = load i32, ptr [[LOCAL]], align 4 ; CHECK-NEXT: ret i32 [[RET]] @@ -314,27 +321,27 @@ exit: ; Hoist the load even if we cannot sink the store, since the store is really ; control-flow dependent. define i32 @test7bad() { -; CHECK-LABEL: @test7bad( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test7bad() { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @capture(ptr [[LOCAL]]) ; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], %[[ENTRY]] ], [ [[X21:%.*]], %[[ELSE:.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[ELSE]] ] ; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X22]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] -; CHECK: if: +; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE]] +; CHECK: [[IF]]: ; CHECK-NEXT: store i32 [[X2]], ptr [[LOCAL]], align 4 -; CHECK-NEXT: br label [[ELSE]] -; CHECK: else: -; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ] +; CHECK-NEXT: br label %[[ELSE]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], %[[IF]] ], [ [[X22]], %[[LOOP]] ] ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RET:%.*]] = load i32, ptr [[LOCAL]], align 4 ; CHECK-NEXT: ret i32 [[RET]] ; @@ -367,22 +374,22 @@ exit: ; opaque() may throw, we can still promote - the load not being guaranteed ; doesn't block us, because %local is always dereferenceable. define i32 @test8() { -; CHECK-LABEL: @test8( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test8() { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @capture(ptr [[LOCAL]]) ; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[THROWAWAY:%.*]] = call i32 @opaque(i32 [[J]]) ; CHECK-NEXT: [[X2]] = call i32 @opaque(i32 [[X21]]) ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: -; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ] ; CHECK-NEXT: store i32 [[X2_LCSSA]], ptr [[LOCAL]], align 4 ; CHECK-NEXT: [[RET:%.*]] = load i32, ptr [[LOCAL]], align 4 ; CHECK-NEXT: ret i32 [[RET]] @@ -412,27 +419,27 @@ exit: ; control flow, we can only promote if the pointer is otherwise known to be ; dereferenceable define i32 @test9() { -; CHECK-LABEL: @test9( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test9() { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @capture(ptr [[LOCAL]]) ; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[ELSE:%.*]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[ELSE:.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[ELSE]] ] ; CHECK-NEXT: [[J2:%.*]] = call i32 @opaque(i32 [[J]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[J2]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] -; CHECK: if: -; CHECK-NEXT: br label [[ELSE]] -; CHECK: else: -; CHECK-NEXT: [[X2]] = phi i32 [ 0, [[LOOP]] ], [ [[X21]], [[IF]] ] +; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE]] +; CHECK: [[IF]]: +; CHECK-NEXT: br label %[[ELSE]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[X2]] = phi i32 [ 0, %[[LOOP]] ], [ [[X21]], %[[IF]] ] ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: -; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[ELSE]] ] +; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[ELSE]] ] ; CHECK-NEXT: store i32 [[X2_LCSSA]], ptr [[LOCAL]], align 4 ; CHECK-NEXT: [[RET:%.*]] = load i32, ptr [[LOCAL]], align 4 ; CHECK-NEXT: ret i32 [[RET]] @@ -465,27 +472,28 @@ exit: } define i32 @test9bad(i32 %i) { -; CHECK-LABEL: @test9bad( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test9bad( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @capture(ptr [[LOCAL]]) -; CHECK-NEXT: [[NOTDEREF:%.*]] = getelementptr i32, ptr [[LOCAL]], i32 [[I:%.*]] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[NOTDEREF:%.*]] = getelementptr i32, ptr [[LOCAL]], i32 [[I]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[ELSE:.*]] ] ; CHECK-NEXT: [[J2:%.*]] = call i32 @opaque(i32 [[J]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[J2]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] -; CHECK: if: +; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE]] +; CHECK: [[IF]]: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[NOTDEREF]], align 4 -; CHECK-NEXT: br label [[ELSE]] -; CHECK: else: -; CHECK-NEXT: [[X2:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[X]], [[IF]] ] +; CHECK-NEXT: br label %[[ELSE]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[X2:%.*]] = phi i32 [ 0, %[[LOOP]] ], [ [[X]], %[[IF]] ] ; CHECK-NEXT: store i32 [[X2]], ptr [[NOTDEREF]], align 4 ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RET:%.*]] = load i32, ptr [[NOTDEREF]], align 4 ; CHECK-NEXT: ret i32 [[RET]] ; @@ -518,19 +526,20 @@ exit: } define void @test10(i32 %i) { -; CHECK-LABEL: @test10( -; CHECK-NEXT: Entry: +; CHECK-LABEL: define void @test10( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[X_PROMOTED:%.*]] = load atomic i32, ptr @X unordered, align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: Loop: -; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[X2]] = add i32 [[X21]], 1 ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]] -; CHECK: Out: -; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[COND]], label %[[OUT:.*]], label %[[LOOP]] +; CHECK: [[OUT]]: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[LOOP]] ] ; CHECK-NEXT: store atomic i32 [[X2_LCSSA]], ptr @X unordered, align 4 ; CHECK-NEXT: ret void ; @@ -555,26 +564,27 @@ Out: ; Early exit is known not to be taken on first iteration and thus doesn't ; effect whether load is known to execute. define void @test11(i32 %i) { -; CHECK-LABEL: @test11( -; CHECK-NEXT: Entry: +; CHECK-LABEL: define void @test11( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[X_PROMOTED:%.*]] = load i32, ptr @X, align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: Loop: -; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BODY:%.*]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[BODY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], %[[ENTRY]] ], [ [[X2:%.*]], %[[BODY:.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[BODY]] ] ; CHECK-NEXT: [[EARLY_TEST:%.*]] = icmp ult i32 [[J]], 32 -; CHECK-NEXT: br i1 [[EARLY_TEST]], label [[BODY]], label [[EARLY:%.*]] -; CHECK: body: +; CHECK-NEXT: br i1 [[EARLY_TEST]], label %[[BODY]], label %[[EARLY:.*]] +; CHECK: [[BODY]]: ; CHECK-NEXT: [[X2]] = add i32 [[X21]], 1 ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]] -; CHECK: Early: -; CHECK-NEXT: [[X21_LCSSA:%.*]] = phi i32 [ [[X21]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[COND]], label %[[OUT:.*]], label %[[LOOP]] +; CHECK: [[EARLY]]: +; CHECK-NEXT: [[X21_LCSSA:%.*]] = phi i32 [ [[X21]], %[[LOOP]] ] ; CHECK-NEXT: store i32 [[X21_LCSSA]], ptr @X, align 4 ; CHECK-NEXT: ret void -; CHECK: Out: -; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[BODY]] ] +; CHECK: [[OUT]]: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], %[[BODY]] ] ; CHECK-NEXT: store i32 [[X2_LCSSA]], ptr @X, align 4 ; CHECK-NEXT: ret void ; @@ -603,21 +613,22 @@ Out: define i8 @test_hoistable_existing_load_sinkable_store_writeonly(ptr dereferenceable(8) %ptr, i8 %start) writeonly { ; CHECK: Function Attrs: memory(write) -; CHECK-LABEL: @test_hoistable_existing_load_sinkable_store_writeonly( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i8, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[INC1:%.*]] = phi i8 [ [[PTR_PROMOTED]], [[ENTRY:%.*]] ], [ [[INC1]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ] +; CHECK-LABEL: define i8 @test_hoistable_existing_load_sinkable_store_writeonly( +; CHECK-SAME: ptr dereferenceable(8) [[PTR:%.*]], i8 [[START:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i8, ptr [[PTR]], align 1 +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[INC1:%.*]] = phi i8 [ [[PTR_PROMOTED]], %[[ENTRY]] ], [ [[INC1]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i8 [ [[START]], %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], 4 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]] -; CHECK: loop.latch: +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] +; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: store i8 [[INC1]], ptr [[PTR]], align 1 ; CHECK-NEXT: [[ADD]] = add i8 [[I]], [[INC1]] -; CHECK-NEXT: br label [[LOOP_HEADER]] -; CHECK: exit: -; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i8 [ [[I]], [[LOOP_HEADER]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i8 [ [[I]], %[[LOOP_HEADER]] ] ; CHECK-NEXT: ret i8 [[I_LCSSA]] ; entry: @@ -644,20 +655,21 @@ exit: ; Test case for PR51248. define void @test_sink_store_only() writeonly { ; CHECK: Function Attrs: memory(write) -; CHECK-LABEL: @test_sink_store_only( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ poison, [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ] +; CHECK-LABEL: define void @test_sink_store_only( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ poison, %[[ENTRY]] ], [ [[DIV:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], 4 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]] -; CHECK: loop.latch: +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] +; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[DIV]] = sdiv i8 [[I]], 3 ; CHECK-NEXT: [[ADD]] = add i8 [[I]], 4 -; CHECK-NEXT: br label [[LOOP_HEADER]] -; CHECK: exit: -; CHECK-NEXT: [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], [[LOOP_HEADER]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], %[[LOOP_HEADER]] ] ; CHECK-NEXT: store i8 [[DIV1_LCSSA]], ptr @glb, align 1 ; CHECK-NEXT: ret void ; @@ -681,21 +693,22 @@ exit: define void @test_sink_store_to_local_object_only_loop_must_execute() writeonly { ; CHECK: Function Attrs: memory(write) -; CHECK-LABEL: @test_sink_store_to_local_object_only_loop_must_execute( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test_sink_store_to_local_object_only_loop_must_execute( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ poison, [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ poison, %[[ENTRY]] ], [ [[DIV:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], 4 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]] -; CHECK: loop.latch: +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] +; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[DIV]] = sdiv i8 [[I]], 3 ; CHECK-NEXT: [[ADD]] = add i8 [[I]], 4 -; CHECK-NEXT: br label [[LOOP_HEADER]] -; CHECK: exit: -; CHECK-NEXT: [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], [[LOOP_HEADER]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], %[[LOOP_HEADER]] ] ; CHECK-NEXT: store i8 [[DIV1_LCSSA]], ptr [[A]], align 1 ; CHECK-NEXT: ret void ; @@ -722,22 +735,23 @@ exit: ; pre-header. Make sure the writeonly attribute is dropped. define void @test_sink_store_to_local_object_only_loop_may_not_execute(i8 %n) writeonly { ; CHECK: Function Attrs: memory(write) -; CHECK-LABEL: @test_sink_store_to_local_object_only_loop_may_not_execute( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test_sink_store_to_local_object_only_loop_may_not_execute( +; CHECK-SAME: i8 [[N:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i8, ptr [[A]], align 1 -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]] -; CHECK: loop.latch: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ [[A_PROMOTED]], %[[ENTRY]] ], [ [[DIV:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] +; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[DIV]] = sdiv i8 [[I]], 3 ; CHECK-NEXT: [[ADD]] = add i8 [[I]], 4 -; CHECK-NEXT: br label [[LOOP_HEADER]] -; CHECK: exit: -; CHECK-NEXT: [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], [[LOOP_HEADER]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], %[[LOOP_HEADER]] ] ; CHECK-NEXT: store i8 [[DIV1_LCSSA]], ptr [[A]], align 1 ; CHECK-NEXT: ret void ; @@ -764,22 +778,23 @@ declare dereferenceable(8) noalias ptr @alloc_writeonly() writeonly define void @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1(i8 %n) writeonly { ; CHECK: Function Attrs: memory(write) -; CHECK-LABEL: @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1( +; CHECK-SAME: i8 [[N:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[A:%.*]] = call noalias dereferenceable(8) ptr @alloc_writeonly() ; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i8, ptr [[A]], align 1 -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]] -; CHECK: loop.latch: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ [[A_PROMOTED]], %[[ENTRY]] ], [ [[DIV:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] +; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[DIV]] = sdiv i8 [[I]], 3 ; CHECK-NEXT: [[ADD]] = add i8 [[I]], 4 -; CHECK-NEXT: br label [[LOOP_HEADER]] -; CHECK: exit: -; CHECK-NEXT: [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], [[LOOP_HEADER]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[DIV1_LCSSA:%.*]] = phi i8 [ [[DIV1]], %[[LOOP_HEADER]] ] ; CHECK-NEXT: store i8 [[DIV1_LCSSA]], ptr [[A]], align 1 ; CHECK-NEXT: ret void ; @@ -804,17 +819,18 @@ exit: define void @test_sink_store_only_no_phi_needed() writeonly { ; CHECK: Function Attrs: memory(write) -; CHECK-LABEL: @test_sink_store_only_no_phi_needed( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ] +; CHECK-LABEL: define void @test_sink_store_only_no_phi_needed( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], 4 ; CHECK-NEXT: [[DIV:%.*]] = sdiv i8 [[I]], 3 ; CHECK-NEXT: [[ADD]] = add i8 [[I]], 4 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: -; CHECK-NEXT: [[DIV_LCSSA:%.*]] = phi i8 [ [[DIV]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[DIV_LCSSA:%.*]] = phi i8 [ [[DIV]], %[[LOOP]] ] ; CHECK-NEXT: store i8 [[DIV_LCSSA]], ptr @glb, align 1 ; CHECK-NEXT: ret void ; @@ -834,28 +850,29 @@ exit: } define void @sink_store_lcssa_phis(ptr %ptr, i1 %c) { -; CHECK-LABEL: @sink_store_lcssa_phis( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP_1_HEADER:%.*]] -; CHECK: loop.1.header: -; CHECK-NEXT: br label [[LOOP_2_HEADER:%.*]] -; CHECK: loop.2.header: -; CHECK-NEXT: br i1 false, label [[LOOP_3_HEADER_PREHEADER:%.*]], label [[LOOP_1_LATCH:%.*]] -; CHECK: loop.3.header.preheader: -; CHECK-NEXT: br label [[LOOP_3_HEADER:%.*]] -; CHECK: loop.3.header: -; CHECK-NEXT: [[I_11:%.*]] = phi i32 [ [[I_1:%.*]], [[LOOP_3_LATCH:%.*]] ], [ poison, [[LOOP_3_HEADER_PREHEADER]] ] -; CHECK-NEXT: [[I_1]] = phi i32 [ 1, [[LOOP_3_LATCH]] ], [ 0, [[LOOP_3_HEADER_PREHEADER]] ] -; CHECK-NEXT: br i1 true, label [[LOOP_3_LATCH]], label [[LOOP_2_LATCH:%.*]] -; CHECK: loop.3.latch: -; CHECK-NEXT: br label [[LOOP_3_HEADER]] -; CHECK: loop.2.latch: -; CHECK-NEXT: [[I_11_LCSSA:%.*]] = phi i32 [ [[I_11]], [[LOOP_3_HEADER]] ] -; CHECK-NEXT: store i32 [[I_11_LCSSA]], ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: br label [[LOOP_2_HEADER]] -; CHECK: loop.1.latch: -; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP_1_HEADER]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-LABEL: define void @sink_store_lcssa_phis( +; CHECK-SAME: ptr [[PTR:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP_1_HEADER:.*]] +; CHECK: [[LOOP_1_HEADER]]: +; CHECK-NEXT: br label %[[LOOP_2_HEADER:.*]] +; CHECK: [[LOOP_2_HEADER]]: +; CHECK-NEXT: br i1 false, label %[[LOOP_3_HEADER_PREHEADER:.*]], label %[[LOOP_1_LATCH:.*]] +; CHECK: [[LOOP_3_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_3_HEADER:.*]] +; CHECK: [[LOOP_3_HEADER]]: +; CHECK-NEXT: [[I_11:%.*]] = phi i32 [ [[I_1:%.*]], %[[LOOP_3_LATCH:.*]] ], [ poison, %[[LOOP_3_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[I_1]] = phi i32 [ 1, %[[LOOP_3_LATCH]] ], [ 0, %[[LOOP_3_HEADER_PREHEADER]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_3_LATCH]], label %[[LOOP_2_LATCH:.*]] +; CHECK: [[LOOP_3_LATCH]]: +; CHECK-NEXT: br label %[[LOOP_3_HEADER]] +; CHECK: [[LOOP_2_LATCH]]: +; CHECK-NEXT: [[I_11_LCSSA:%.*]] = phi i32 [ [[I_11]], %[[LOOP_3_HEADER]] ] +; CHECK-NEXT: store i32 [[I_11_LCSSA]], ptr [[PTR]], align 4 +; CHECK-NEXT: br label %[[LOOP_2_HEADER]] +; CHECK: [[LOOP_1_LATCH]]: +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_1_HEADER]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: @@ -886,18 +903,19 @@ exit: } define void @cond_store_writable_dereferenceable(ptr noalias writable dereferenceable(4) %ptr) { -; CHECK-LABEL: @cond_store_writable_dereferenceable( -; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[V_INC1:%.*]] = phi i32 [ [[V_INC:%.*]], [[LOOP_LATCH:%.*]] ], [ [[PTR_PROMOTED]], [[TMP0:%.*]] ] +; CHECK-LABEL: define void @cond_store_writable_dereferenceable( +; CHECK-SAME: ptr noalias writable dereferenceable(4) [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[V_INC1:%.*]] = phi i32 [ [[V_INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[PTR_PROMOTED]], [[TMP0:%.*]] ] ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[V_INC1]], 10 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[EXIT:%.*]] -; CHECK: loop.latch: +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] +; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[V_INC]] = add i32 [[V_INC1]], 1 -; CHECK-NEXT: br label [[LOOP]] -; CHECK: exit: -; CHECK-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], %[[LOOP]] ] ; CHECK-NEXT: store i32 [[V_INC1_LCSSA]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -918,18 +936,19 @@ exit: } define void @cond_store_writable_not_sufficiently_dereferenceable(ptr noalias writable dereferenceable(2) %ptr) { -; CHECK-LABEL: @cond_store_writable_not_sufficiently_dereferenceable( -; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[V_INC1:%.*]] = phi i32 [ [[V_INC:%.*]], [[LOOP_LATCH:%.*]] ], [ [[PTR_PROMOTED]], [[TMP0:%.*]] ] +; CHECK-LABEL: define void @cond_store_writable_not_sufficiently_dereferenceable( +; CHECK-SAME: ptr noalias writable dereferenceable(2) [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[V_INC1:%.*]] = phi i32 [ [[V_INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[PTR_PROMOTED]], [[TMP0:%.*]] ] ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[V_INC1]], 10 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[EXIT:%.*]] -; CHECK: loop.latch: +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] +; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[V_INC]] = add i32 [[V_INC1]], 1 ; CHECK-NEXT: store i32 [[V_INC]], ptr [[PTR]], align 4 -; CHECK-NEXT: br label [[LOOP]] -; CHECK: exit: +; CHECK-NEXT: br label %[[LOOP]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; br label %loop @@ -954,3 +973,11 @@ exit: !3 = !{!5, !5, i64 0} !4 = !{!"int", !1} !5 = !{!"float", !1} +;. +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[FLOAT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"float", [[META2]]} +;. diff --git a/llvm/test/Transforms/LICM/variant-aainfo.ll b/llvm/test/Transforms/LICM/variant-aainfo.ll index 1e2a33ec990c5..4eac3f2770f67 100644 --- a/llvm/test/Transforms/LICM/variant-aainfo.ll +++ b/llvm/test/Transforms/LICM/variant-aainfo.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=licm | FileCheck %s ; See https://discourse.llvm.org/t/rfc-dont-merge-memory-locations-in-aliassettracker/73336 @@ -8,21 +8,21 @@ define void @_Z4testP1S(ptr %s) { ; CHECK-LABEL: define void @_Z4testP1S( ; CHECK-SAME: ptr [[S:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[S_PROMOTED:%.*]] = load ptr, ptr [[S]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[ADD_PTR_I_LCSSA:%.*]] = phi ptr [ [[ADD_PTR_I:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: store ptr [[ADD_PTR_I_LCSSA]], ptr [[S]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[S_PROMOTED:%.*]] = load ptr, ptr [[S]], align 4, !tbaa [[ANYPTR_TBAA0:![0-9]+]] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: [[ADD_PTR_I_LCSSA:%.*]] = phi ptr [ [[ADD_PTR_I:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: store ptr [[ADD_PTR_I_LCSSA]], ptr [[S]], align 4, !tbaa [[ANYPTR_TBAA0]] ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[ADD_PTR_I1:%.*]] = phi ptr [ [[S_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD_PTR_I]], [[FOR_BODY]] ] -; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: store i32 [[I_05]], ptr [[ADD_PTR_I1]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[ADD_PTR_I1:%.*]] = phi ptr [ [[S_PROMOTED]], %[[ENTRY]] ], [ [[ADD_PTR_I]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: store i32 [[I_05]], ptr [[ADD_PTR_I1]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] ; CHECK-NEXT: [[ADD_PTR_I]] = getelementptr inbounds i32, ptr [[ADD_PTR_I1]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 100 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; entry: br label %for.body @@ -50,10 +50,10 @@ for.body: ; preds = %entry, %for.body !6 = !{!"int", !3, i64 0} !7 = !{!2, !2, i64 0} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0} ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"Simple C++ TBAA"} -; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} ; CHECK: [[META5]] = !{!"int", [[META2]], i64 0} ;. diff --git a/llvm/test/Transforms/LoopIdiom/memmove-tbaa.ll b/llvm/test/Transforms/LoopIdiom/memmove-tbaa.ll index 881931e0ccc2c..218b7f4487cb5 100644 --- a/llvm/test/Transforms/LoopIdiom/memmove-tbaa.ll +++ b/llvm/test/Transforms/LoopIdiom/memmove-tbaa.ll @@ -1,21 +1,22 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes="loop-idiom" < %s -S | FileCheck %s define void @looper(ptr nocapture %out) { -; CHECK-LABEL: @looper( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = getelementptr double, ptr [[OUT:%.*]], i32 16 -; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false), !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: br label [[FOR_BODY4:%.*]] -; CHECK: for.body4: -; CHECK-NEXT: [[J_020:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY4]] ] +; CHECK-LABEL: define void @looper( +; CHECK-SAME: ptr captures(none) [[OUT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[M:%.*]] = getelementptr double, ptr [[OUT]], i32 16 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false), !tbaa [[DOUBLE_TBAA0:![0-9]+]] +; CHECK-NEXT: br label %[[FOR_BODY4:.*]] +; CHECK: [[FOR_BODY4]]: +; CHECK-NEXT: [[J_020:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY4]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[M]], i64 [[J_020]] -; CHECK-NEXT: [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[J_020]] ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[J_020]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[J_020]], 31 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY4]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.cond.cleanup: +; CHECK-NEXT: br i1 [[CMP2]], label %[[FOR_BODY4]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void ; entry: @@ -38,20 +39,21 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3 define void @looperBadMerge(ptr nocapture %out) { -; CHECK-LABEL: @looperBadMerge( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = getelementptr double, ptr [[OUT:%.*]], i32 16 -; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false), !tbaa [[TBAA4:![0-9]+]] -; CHECK-NEXT: br label [[FOR_BODY4:%.*]] -; CHECK: for.body4: -; CHECK-NEXT: [[J_020:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY4]] ] +; CHECK-LABEL: define void @looperBadMerge( +; CHECK-SAME: ptr captures(none) [[OUT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[M:%.*]] = getelementptr double, ptr [[OUT]], i32 16 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false), !tbaa [[CHAR_TBAA4:![0-9]+]] +; CHECK-NEXT: br label %[[FOR_BODY4:.*]] +; CHECK: [[FOR_BODY4]]: +; CHECK-NEXT: [[J_020:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY4]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[M]], i64 [[J_020]] -; CHECK-NEXT: [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[J_020]] ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[J_020]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[J_020]], 31 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY4]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.cond.cleanup: +; CHECK-NEXT: br i1 [[CMP2]], label %[[FOR_BODY4]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void ; entry: @@ -73,20 +75,21 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3 } define void @looperGoodMerge(ptr nocapture %out) { -; CHECK-LABEL: @looperGoodMerge( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = getelementptr double, ptr [[OUT:%.*]], i32 16 +; CHECK-LABEL: define void @looperGoodMerge( +; CHECK-SAME: ptr captures(none) [[OUT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[M:%.*]] = getelementptr double, ptr [[OUT]], i32 16 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[OUT]], ptr align 8 [[M]], i64 256, i1 false) -; CHECK-NEXT: br label [[FOR_BODY4:%.*]] -; CHECK: for.body4: -; CHECK-NEXT: [[J_020:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY4]] ] +; CHECK-NEXT: br label %[[FOR_BODY4:.*]] +; CHECK: [[FOR_BODY4]]: +; CHECK-NEXT: [[J_020:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY4]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[M]], i64 [[J_020]] -; CHECK-NEXT: [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 [[J_020]] ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[J_020]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[J_020]], 31 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY4]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.cond.cleanup: +; CHECK-NEXT: br i1 [[CMP2]], label %[[FOR_BODY4]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void ; entry: @@ -114,3 +117,10 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3 !6 = !{!"double", !7, i64 0} !7 = !{!"omnipotent char", !8, i64 0} !8 = !{!"Simple C++ TBAA"} +;. +; CHECK: [[DOUBLE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +; CHECK: [[CHAR_TBAA4]] = !{[[META2]], [[META2]], i64 0} +;. diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll index 89ce66767ccc9..6f48c41a2ad06 100644 --- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll +++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='loop-unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s @@ -6,137 +6,138 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" ; Tests for(i) { sum = 0; for(j) sum += B[j]; A[i] = sum; } define void @test1(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 { -; CHECK-LABEL: @test1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[E:%.*]], 0 -; CHECK-NEXT: [[CMPJ:%.*]] = icmp ne i32 [[I:%.*]], 0 +; CHECK-LABEL: define void @test1( +; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[E]], 0 +; CHECK-NEXT: [[CMPJ:%.*]] = icmp ne i32 [[I]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMPJ]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END:%.*]] -; CHECK: for.outer.preheader: +; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_OUTER_PREHEADER:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_OUTER_PREHEADER]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[I]], -1 ; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[I]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]] -; CHECK: for.outer.preheader.new: +; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]] +; CHECK: [[FOR_OUTER_PREHEADER_NEW]]: ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]] -; CHECK-NEXT: br label [[FOR_OUTER:%.*]] -; CHECK: for.outer: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ] +; CHECK-NEXT: br label %[[FOR_OUTER:.*]] +; CHECK: [[FOR_OUTER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ] ; CHECK-NEXT: [[ADD8:%.*]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[ADD8_1:%.*]] = add nuw nsw i32 [[I]], 2 ; CHECK-NEXT: [[ADD8_2:%.*]] = add nuw nsw i32 [[I]], 3 ; CHECK-NEXT: [[ADD8_3]] = add nuw i32 [[I]], 4 ; CHECK-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER]], 4 -; CHECK-NEXT: br label [[FOR_INNER:%.*]] -; CHECK: for.inner: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: br label %[[FOR_INNER:.*]] +; CHECK: [[FOR_INNER]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] ; CHECK-NEXT: [[ADD]] = add i32 [[TMP2]], [[SUM]] ; CHECK-NEXT: [[INC]] = add nuw i32 [[J]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_1]] = add i32 [[TMP3]], [[SUM_1]] ; CHECK-NEXT: [[INC_1]] = add nuw i32 [[J_1]], 1 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_2]] = add i32 [[TMP4]], [[SUM_2]] ; CHECK-NEXT: [[INC_2]] = add nuw i32 [[J_2]], 1 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_3]] = add i32 [[TMP5]], [[SUM_3]] ; CHECK-NEXT: [[INC_3]] = add nuw i32 [[J_3]], 1 ; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]] -; CHECK: for.latch: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]] -; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]] +; CHECK: [[FOR_LATCH]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_1]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_2]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]] -; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit.unr-lcssa.loopexit: -; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], [[FOR_LATCH]] ] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]] -; CHECK: for.end.loopexit.unr-lcssa: -; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], %[[FOR_LATCH]] ] +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]] +; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: for.outer.epil.preheader: -; CHECK-NEXT: br label [[FOR_OUTER_EPIL:%.*]] -; CHECK: for.outer.epil: -; CHECK-NEXT: br label [[FOR_INNER_EPIL:%.*]] -; CHECK: for.inner.epil: -; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], [[FOR_INNER_EPIL]] ] +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END_LOOPEXIT:.*]] +; CHECK: [[FOR_OUTER_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_OUTER_EPIL:.*]] +; CHECK: [[FOR_OUTER_EPIL]]: +; CHECK-NEXT: br label %[[FOR_INNER_EPIL:.*]] +; CHECK: [[FOR_INNER_EPIL]]: +; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL]] = add i32 [[TMP6]], [[SUM_EPIL]] ; CHECK-NEXT: [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]] -; CHECK: for.latch.epil: -; CHECK-NEXT: [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]] +; CHECK: [[FOR_LATCH_EPIL]]: +; CHECK-NEXT: [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD8_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1 ; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]] -; CHECK: for.outer.epil.1: -; CHECK-NEXT: br label [[FOR_INNER_EPIL_1:%.*]] -; CHECK: for.inner.epil.1: -; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] -; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]] +; CHECK: [[FOR_OUTER_EPIL_1]]: +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_1:.*]] +; CHECK: [[FOR_INNER_EPIL_1]]: +; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] ; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL_1]] = add i32 [[TMP7]], [[SUM_EPIL_1]] ; CHECK-NEXT: [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]] -; CHECK: for.latch.epil.1: -; CHECK-NEXT: [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]] +; CHECK: [[FOR_LATCH_EPIL_1]]: +; CHECK-NEXT: [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], %[[FOR_INNER_EPIL_1]] ] ; CHECK-NEXT: [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_EPIL]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD8_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2 ; CHECK-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.outer.epil.2: -; CHECK-NEXT: br label [[FOR_INNER_EPIL_2:%.*]] -; CHECK: for.inner.epil.2: -; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] -; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]] +; CHECK: [[FOR_OUTER_EPIL_2]]: +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_2:.*]] +; CHECK: [[FOR_INNER_EPIL_2]]: +; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] ; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL_2]] = add i32 [[TMP8]], [[SUM_EPIL_2]] ; CHECK-NEXT: [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]] -; CHECK: for.latch.epil.2: -; CHECK-NEXT: [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]] +; CHECK: [[FOR_LATCH_EPIL_2]]: +; CHECK-NEXT: [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], %[[FOR_INNER_EPIL_2]] ] ; CHECK-NEXT: [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_EPIL_1]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.end.loopexit.epilog-lcssa: -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: +; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]] +; CHECK: [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]: +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; entry: @@ -181,144 +182,145 @@ for.end: ; Tests for(i) { sum = A[i]; for(j) sum += B[j]; A[i] = sum; } ; A[i] load/store dependency should not block unroll-and-jam define void @test2(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 { -; CHECK-LABEL: @test2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[E:%.*]], 0 -; CHECK-NEXT: [[CMP125:%.*]] = icmp ne i32 [[I:%.*]], 0 +; CHECK-LABEL: define void @test2( +; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[E]], 0 +; CHECK-NEXT: [[CMP125:%.*]] = icmp ne i32 [[I]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP125]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END10:%.*]] -; CHECK: for.outer.preheader: +; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_OUTER_PREHEADER:.*]], label %[[FOR_END10:.*]] +; CHECK: [[FOR_OUTER_PREHEADER]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[I]], -1 ; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[I]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END10_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]] -; CHECK: for.outer.preheader.new: +; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]] +; CHECK: [[FOR_OUTER_PREHEADER_NEW]]: ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]] -; CHECK-NEXT: br label [[FOR_OUTER:%.*]] -; CHECK: for.outer: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD9_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br label %[[FOR_OUTER:.*]] +; CHECK: [[FOR_OUTER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD9_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9:%.*]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_1:%.*]] = add nuw nsw i32 [[I]], 2 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_1]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_2:%.*]] = add nuw nsw i32 [[I]], 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_2]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_3]] = add nuw i32 [[I]], 4 ; CHECK-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER]], 4 -; CHECK-NEXT: br label [[FOR_INNER:%.*]] -; CHECK: for.inner: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[TMP2]], [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ [[TMP3]], [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ [[TMP4]], [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ [[TMP5]], [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER:.*]] +; CHECK: [[FOR_INNER]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[TMP2]], %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ [[TMP3]], %[[FOR_OUTER]] ], [ [[ADD_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ [[TMP4]], %[[FOR_OUTER]] ], [ [[ADD_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ [[TMP5]], %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD]] = add i32 [[TMP6]], [[SUM]] ; CHECK-NEXT: [[INC]] = add nuw i32 [[J]], 1 ; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_1]] = add i32 [[TMP7]], [[SUM_1]] ; CHECK-NEXT: [[INC_1]] = add nuw i32 [[J_1]], 1 ; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_2]] = add i32 [[TMP8]], [[SUM_2]] ; CHECK-NEXT: [[INC_2]] = add nuw i32 [[J_2]], 1 ; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_3]] = add i32 [[TMP9]], [[SUM_3]] ; CHECK-NEXT: [[INC_3]] = add nuw i32 [[J_3]], 1 ; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]] -; CHECK: for.latch: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ] -; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]] +; CHECK: [[FOR_LATCH]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_INNER]] ] +; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]] -; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: for.end10.loopexit.unr-lcssa.loopexit: -; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD9_3]], [[FOR_LATCH]] ] -; CHECK-NEXT: br label [[FOR_END10_LOOPEXIT_UNR_LCSSA]] -; CHECK: for.end10.loopexit.unr-lcssa: -; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_LATCH]] ] +; CHECK-NEXT: br label %[[FOR_END10_LOOPEXIT_UNR_LCSSA]] +; CHECK: [[FOR_END10_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END10_LOOPEXIT:%.*]] -; CHECK: for.outer.epil.preheader: -; CHECK-NEXT: br label [[FOR_OUTER_EPIL:%.*]] -; CHECK: for.outer.epil: +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END10_LOOPEXIT:.*]] +; CHECK: [[FOR_OUTER_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_OUTER_EPIL:.*]] +; CHECK: [[FOR_OUTER_EPIL]]: ; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_INNER_EPIL:%.*]] -; CHECK: for.inner.epil: -; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ [[TMP10]], [[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], [[FOR_INNER_EPIL]] ] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER_EPIL:.*]] +; CHECK: [[FOR_INNER_EPIL]]: +; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ [[TMP10]], %[[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL]] = add i32 [[TMP11]], [[SUM_EPIL]] ; CHECK-NEXT: [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]] -; CHECK: for.latch.epil: -; CHECK-NEXT: [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]] +; CHECK: [[FOR_LATCH_EPIL]]: +; CHECK-NEXT: [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1 ; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA:%.*]] -; CHECK: for.outer.epil.1: +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END10_LOOPEXIT_EPILOG_LCSSA:.*]] +; CHECK: [[FOR_OUTER_EPIL_1]]: ; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_EPIL]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_INNER_EPIL_1:%.*]] -; CHECK: for.inner.epil.1: -; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] -; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ [[TMP12]], [[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_1:.*]] +; CHECK: [[FOR_INNER_EPIL_1]]: +; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ [[TMP12]], %[[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] ; CHECK-NEXT: [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL_1]] = add i32 [[TMP13]], [[SUM_EPIL_1]] ; CHECK-NEXT: [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]] -; CHECK: for.latch.epil.1: -; CHECK-NEXT: [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ] -; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]] +; CHECK: [[FOR_LATCH_EPIL_1]]: +; CHECK-NEXT: [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], %[[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2 ; CHECK-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.outer.epil.2: +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END10_LOOPEXIT_EPILOG_LCSSA]] +; CHECK: [[FOR_OUTER_EPIL_2]]: ; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_EPIL_1]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_INNER_EPIL_2:%.*]] -; CHECK: for.inner.epil.2: -; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] -; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ [[TMP14]], [[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_2:.*]] +; CHECK: [[FOR_INNER_EPIL_2]]: +; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ [[TMP14]], %[[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] ; CHECK-NEXT: [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL_2]] = add i32 [[TMP15]], [[SUM_EPIL_2]] ; CHECK-NEXT: [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]] -; CHECK: for.latch.epil.2: -; CHECK-NEXT: [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ] -; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.end10.loopexit.epilog-lcssa: -; CHECK-NEXT: br label [[FOR_END10_LOOPEXIT]] -; CHECK: for.end10.loopexit: -; CHECK-NEXT: br label [[FOR_END10]] -; CHECK: for.end10: +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]] +; CHECK: [[FOR_LATCH_EPIL_2]]: +; CHECK-NEXT: [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], %[[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_END10_LOOPEXIT_EPILOG_LCSSA]] +; CHECK: [[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]: +; CHECK-NEXT: br label %[[FOR_END10_LOOPEXIT]] +; CHECK: [[FOR_END10_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_END10]] +; CHECK: [[FOR_END10]]: ; CHECK-NEXT: ret void ; entry: @@ -363,61 +365,62 @@ for.end10: ; Tests Complete unroll-and-jam of the outer loop define void @test3(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 { -; CHECK-LABEL: @test3( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[E:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_PREHEADER:%.*]] -; CHECK: for.preheader: -; CHECK-NEXT: br label [[FOR_OUTER:%.*]] -; CHECK: for.outer: -; CHECK-NEXT: br label [[FOR_INNER:%.*]] -; CHECK: for.inner: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]] +; CHECK-LABEL: define void @test3( +; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[E]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_PREHEADER:.*]] +; CHECK: [[FOR_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_OUTER:.*]] +; CHECK: [[FOR_OUTER]]: +; CHECK-NEXT: br label %[[FOR_INNER:.*]] +; CHECK: [[FOR_INNER]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[SUM]], 10 ; CHECK-NEXT: [[ADD]] = sub i32 [[SUB]], [[TMP0]] ; CHECK-NEXT: [[INC]] = add nuw i32 [[J]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[SUB_1:%.*]] = add i32 [[SUM_1]], 10 ; CHECK-NEXT: [[ADD_1]] = sub i32 [[SUB_1]], [[TMP1]] ; CHECK-NEXT: [[INC_1]] = add nuw i32 [[J_1]], 1 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[SUB_2:%.*]] = add i32 [[SUM_2]], 10 ; CHECK-NEXT: [[ADD_2]] = sub i32 [[SUB_2]], [[TMP2]] ; CHECK-NEXT: [[INC_2]] = add nuw i32 [[J_2]], 1 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[SUB_3:%.*]] = add i32 [[SUM_3]], 10 ; CHECK-NEXT: [[ADD_3]] = sub i32 [[SUB_3]], [[TMP3]] ; CHECK-NEXT: [[INC_3]] = add nuw i32 [[J_3]], 1 ; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_LATCH:%.*]], label [[FOR_INNER]] -; CHECK: for.latch: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ] -; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[A:%.*]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_3]], label %[[FOR_LATCH:.*]], label %[[FOR_INNER]] +; CHECK: [[FOR_LATCH]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_INNER]] ] +; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1 -; CHECK-NEXT: store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2 -; CHECK-NEXT: store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 3 -; CHECK-NEXT: store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: +; CHECK-NEXT: store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT:.*]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; entry: @@ -456,31 +459,32 @@ for.end: ; Tests Complete unroll-and-jam with a trip count of 1 define void @test4(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 { -; CHECK-LABEL: @test4( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[E:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_PREHEADER:%.*]] -; CHECK: for.preheader: -; CHECK-NEXT: br label [[FOR_OUTER:%.*]] -; CHECK: for.outer: -; CHECK-NEXT: br label [[FOR_INNER:%.*]] -; CHECK: for.inner: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]] +; CHECK-LABEL: define void @test4( +; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[E]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_PREHEADER:.*]] +; CHECK: [[FOR_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_OUTER:.*]] +; CHECK: [[FOR_OUTER]]: +; CHECK-NEXT: br label %[[FOR_INNER:.*]] +; CHECK: [[FOR_INNER]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[SUM]], 10 ; CHECK-NEXT: [[ADD]] = sub i32 [[SUB]], [[TMP0]] ; CHECK-NEXT: [[INC]] = add nuw i32 [[J]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_LATCH:%.*]], label [[FOR_INNER]] -; CHECK: for.latch: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ] -; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[A:%.*]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_LATCH:.*]], label %[[FOR_INNER]] +; CHECK: [[FOR_LATCH]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ] +; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT:.*]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; entry: @@ -520,47 +524,47 @@ for.end: ; Multiple SubLoopBlocks @a = hidden global [1 x i32] zeroinitializer, align 4 define i32 @test5() #0 { -; CHECK-LABEL: @test5( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_OUTER:%.*]] -; CHECK: for.outer: -; CHECK-NEXT: br label [[FOR_INNER:%.*]] -; CHECK: for.inner: -; CHECK-NEXT: [[INC8_SINK15:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC8:%.*]], [[FOR_INC_1:%.*]] ] -; CHECK-NEXT: [[INC8_SINK15_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC8_1:%.*]], [[FOR_INC_1]] ] -; CHECK-NEXT: br label [[FOR_INNER2:%.*]] -; CHECK: for.inner2: +; CHECK-LABEL: define i32 @test5() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[FOR_OUTER:.*]] +; CHECK: [[FOR_OUTER]]: +; CHECK-NEXT: br label %[[FOR_INNER:.*]] +; CHECK: [[FOR_INNER]]: +; CHECK-NEXT: [[INC8_SINK15:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC8:%.*]], %[[FOR_INC_1:.*]] ] +; CHECK-NEXT: [[INC8_SINK15_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC8_1:%.*]], %[[FOR_INC_1]] ] +; CHECK-NEXT: br label %[[FOR_INNER2:.*]] +; CHECK: [[FOR_INNER2]]: ; CHECK-NEXT: [[L1:%.*]] = load i32, ptr @a, align 4 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[L1]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_COND4:%.*]], label [[FOR_INC:%.*]] -; CHECK: for.cond4: +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[FOR_COND4:.*]], label %[[FOR_INC:.*]] +; CHECK: [[FOR_COND4]]: ; CHECK-NEXT: [[L0:%.*]] = load i32, ptr getelementptr inbounds ([1 x i32], ptr @a, i32 1, i32 0), align 4 ; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[L0]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_1]], label [[FOR_COND4A:%.*]], label [[FOR_INC]] -; CHECK: for.cond4a: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: +; CHECK-NEXT: br i1 [[TOBOOL_1]], label %[[FOR_COND4A:.*]], label %[[FOR_INC]] +; CHECK: [[FOR_COND4A]]: +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: ; CHECK-NEXT: [[INC8]] = add nuw nsw i32 [[INC8_SINK15]], 1 ; CHECK-NEXT: [[L1_1:%.*]] = load i32, ptr @a, align 4 ; CHECK-NEXT: [[TOBOOL_11:%.*]] = icmp eq i32 [[L1_1]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_11]], label [[FOR_COND4_1:%.*]], label [[FOR_INC_1]] -; CHECK: for.latch: -; CHECK-NEXT: [[DOTLCSSA_1:%.*]] = phi i32 [ [[L2_1:%.*]], [[FOR_INC_1]] ] -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_1]], [[FOR_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[TOBOOL_11]], label %[[FOR_COND4_1:.*]], label %[[FOR_INC_1]] +; CHECK: [[FOR_LATCH:.*]]: +; CHECK-NEXT: [[DOTLCSSA_1:%.*]] = phi i32 [ [[L2_1:%.*]], %[[FOR_INC_1]] ] +; CHECK-NEXT: br label %[[FOR_END:.*]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_1]], %[[FOR_LATCH]] ] ; CHECK-NEXT: ret i32 0 -; CHECK: for.cond4.1: +; CHECK: [[FOR_COND4_1]]: ; CHECK-NEXT: [[L0_1:%.*]] = load i32, ptr getelementptr inbounds ([1 x i32], ptr @a, i32 1, i32 0), align 4 ; CHECK-NEXT: [[TOBOOL_1_1:%.*]] = icmp eq i32 [[L0_1]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_1_1]], label [[FOR_COND4A_1:%.*]], label [[FOR_INC_1]] -; CHECK: for.cond4a.1: -; CHECK-NEXT: br label [[FOR_INC_1]] -; CHECK: for.inc.1: -; CHECK-NEXT: [[L2_1]] = phi i32 [ 0, [[FOR_INC]] ], [ 1, [[FOR_COND4_1]] ], [ 2, [[FOR_COND4A_1]] ] +; CHECK-NEXT: br i1 [[TOBOOL_1_1]], label %[[FOR_COND4A_1:.*]], label %[[FOR_INC_1]] +; CHECK: [[FOR_COND4A_1]]: +; CHECK-NEXT: br label %[[FOR_INC_1]] +; CHECK: [[FOR_INC_1]]: +; CHECK-NEXT: [[L2_1]] = phi i32 [ 0, %[[FOR_INC]] ], [ 1, %[[FOR_COND4_1]] ], [ 2, %[[FOR_COND4A_1]] ] ; CHECK-NEXT: [[INC8_1]] = add nuw nsw i32 [[INC8_SINK15_1]], 1 ; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC8_1]], 3 -; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[FOR_LATCH]], label [[FOR_INNER]] +; CHECK-NEXT: br i1 [[EXITCOND_1]], label %[[FOR_LATCH]], label %[[FOR_INNER]] ; entry: br label %for.outer @@ -608,57 +612,57 @@ for.end: ; Test odd uses of phi nodes @f = hidden global i32 0, align 4 define i32 @test6() #0 { -; CHECK-LABEL: @test6( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[F_PROMOTED10:%.*]] = load i32, ptr @f, align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br i1 false, label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] -; CHECK: entry.new: -; CHECK-NEXT: br label [[FOR_OUTER:%.*]] -; CHECK: for.outer: -; CHECK-NEXT: [[INC5_SINK9:%.*]] = phi i32 [ 2, [[ENTRY_NEW]] ], [ [[INC5_3:%.*]], [[FOR_LATCH:%.*]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ] +; CHECK-LABEL: define i32 @test6() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[F_PROMOTED10:%.*]] = load i32, ptr @f, align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br i1 false, label %[[FOR_END_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: br label %[[FOR_OUTER:.*]] +; CHECK: [[FOR_OUTER]]: +; CHECK-NEXT: [[INC5_SINK9:%.*]] = phi i32 [ 2, %[[ENTRY_NEW]] ], [ [[INC5_3:%.*]], %[[FOR_LATCH:.*]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ] ; CHECK-NEXT: [[INC5_3]] = add nuw nsw i32 [[INC5_SINK9]], 4 ; CHECK-NEXT: [[NITER_NEXT_3]] = add nuw nsw i32 [[NITER]], 4 -; CHECK-NEXT: br label [[FOR_INNER:%.*]] -; CHECK: for.inner: -; CHECK-NEXT: [[INC_SINK8:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[INC_SINK8_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[INC_SINK8_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[INC_SINK8_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ] +; CHECK-NEXT: br label %[[FOR_INNER:.*]] +; CHECK: [[FOR_INNER]]: +; CHECK-NEXT: [[INC_SINK8:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[INC_SINK8_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[INC_SINK8_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[INC_SINK8_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[INC_SINK8]], 1 ; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[INC_SINK8_1]], 1 ; CHECK-NEXT: [[INC_2]] = add nuw nsw i32 [[INC_SINK8_2]], 1 ; CHECK-NEXT: [[INC_3]] = add nuw nsw i32 [[INC_SINK8_3]], 1 ; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp ne i32 [[INC_3]], 7 -; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_INNER]], label [[FOR_LATCH]] -; CHECK: for.latch: -; CHECK-NEXT: br i1 false, label [[FOR_OUTER]], label [[FOR_END_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.end.unr-lcssa.loopexit: -; CHECK-NEXT: [[DOTLCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 2, [[FOR_LATCH]] ] -; CHECK-NEXT: [[INC_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 7, [[FOR_LATCH]] ] -; CHECK-NEXT: [[P0_UNR_PH:%.*]] = phi i32 [ 2, [[FOR_LATCH]] ] -; CHECK-NEXT: br label [[FOR_END_UNR_LCSSA]] -; CHECK: for.end.unr-lcssa: -; CHECK-NEXT: [[DOTLCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[DOTLCSSA_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[INC_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[INC_LCSSA_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[P0_UNR:%.*]] = phi i32 [ [[F_PROMOTED10]], [[ENTRY]] ], [ [[P0_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: br i1 true, label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END:%.*]] -; CHECK: for.outer.epil.preheader: -; CHECK-NEXT: br label [[FOR_OUTER_EPIL:%.*]] -; CHECK: for.outer.epil: -; CHECK-NEXT: br label [[FOR_INNER_EPIL:%.*]] -; CHECK: for.inner.epil: -; CHECK-NEXT: [[P1_EPIL:%.*]] = phi i32 [ [[P0_UNR]], [[FOR_OUTER_EPIL]] ], [ 2, [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: [[INC_SINK8_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ] +; CHECK-NEXT: br i1 [[EXITCOND_3]], label %[[FOR_INNER]], label %[[FOR_LATCH]] +; CHECK: [[FOR_LATCH]]: +; CHECK-NEXT: br i1 false, label %[[FOR_OUTER]], label %[[FOR_END_UNR_LCSSA_LOOPEXIT:.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[FOR_END_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[DOTLCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ] +; CHECK-NEXT: [[INC_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 7, %[[FOR_LATCH]] ] +; CHECK-NEXT: [[P0_UNR_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ] +; CHECK-NEXT: br label %[[FOR_END_UNR_LCSSA]] +; CHECK: [[FOR_END_UNR_LCSSA]]: +; CHECK-NEXT: [[DOTLCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[DOTLCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[INC_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[INC_LCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[P0_UNR:%.*]] = phi i32 [ [[F_PROMOTED10]], %[[ENTRY]] ], [ [[P0_UNR_PH]], %[[FOR_END_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: br i1 true, label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_OUTER_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_OUTER_EPIL:.*]] +; CHECK: [[FOR_OUTER_EPIL]]: +; CHECK-NEXT: br label %[[FOR_INNER_EPIL:.*]] +; CHECK: [[FOR_INNER_EPIL]]: +; CHECK-NEXT: [[P1_EPIL:%.*]] = phi i32 [ [[P0_UNR]], %[[FOR_OUTER_EPIL]] ], [ 2, %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: [[INC_SINK8_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[INC_EPIL]] = add nuw nsw i32 [[INC_SINK8_EPIL]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp ne i32 [[INC_EPIL]], 7 -; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[FOR_INNER_EPIL]], label [[FOR_LATCH_EPIL:%.*]] -; CHECK: for.latch.epil: -; CHECK-NEXT: [[DOTLCSSA_EPIL:%.*]] = phi i32 [ [[P1_EPIL]], [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], [[FOR_LATCH_EPIL]] ] -; CHECK-NEXT: [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ 7, [[FOR_LATCH_EPIL]] ] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label %[[FOR_INNER_EPIL]], label %[[FOR_LATCH_EPIL:.*]] +; CHECK: [[FOR_LATCH_EPIL]]: +; CHECK-NEXT: [[DOTLCSSA_EPIL:%.*]] = phi i32 [ [[P1_EPIL]], %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], %[[FOR_LATCH_EPIL]] ] +; CHECK-NEXT: [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ 7, %[[FOR_LATCH_EPIL]] ] ; CHECK-NEXT: ret i32 0 ; entry: @@ -693,159 +697,160 @@ for.end: ; Has a positive dependency between two stores. Still valid. ; The negative dependecy is in unroll-and-jam-disabled.ll define void @test7(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 { -; CHECK-LABEL: @test7( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[E:%.*]], 0 -; CHECK-NEXT: [[CMP128:%.*]] = icmp ne i32 [[I:%.*]], 0 +; CHECK-LABEL: define void @test7( +; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[E]], 0 +; CHECK-NEXT: [[CMP128:%.*]] = icmp ne i32 [[I]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP128]], [[CMP]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]] -; CHECK: for.preheader: +; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_PREHEADER:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_PREHEADER]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[I]], -1 ; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[I]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_PREHEADER_NEW:%.*]] -; CHECK: for.preheader.new: +; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_PREHEADER_NEW:.*]] +; CHECK: [[FOR_PREHEADER_NEW]]: ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]] -; CHECK-NEXT: br label [[FOR_OUTER:%.*]] -; CHECK: for.outer: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_PREHEADER_NEW]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br label %[[FOR_OUTER:.*]] +; CHECK: [[FOR_OUTER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_PREHEADER_NEW]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, %[[FOR_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[I]], 2 ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[I]], 3 ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_3]] = add nuw i32 [[I]], 4 ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_3]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER]], 4 -; CHECK-NEXT: br label [[FOR_INNER:%.*]] -; CHECK: for.latch: -; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER:.*]] +; CHECK: [[FOR_LATCH]]: +; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]] -; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: for.inner: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_1]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_2]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_3]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[FOR_INNER]]: +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_1]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_2]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_3]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]] +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9]] = add i32 [[L1]], [[SUM]] ; CHECK-NEXT: [[ADD10]] = add nuw i32 [[J]], 1 ; CHECK-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]] -; CHECK-NEXT: [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX7_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX7_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_1]] = add i32 [[L1_1]], [[SUM_1]] ; CHECK-NEXT: [[ADD10_1]] = add nuw i32 [[J_1]], 1 ; CHECK-NEXT: [[ARRAYIDX7_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]] -; CHECK-NEXT: [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX7_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX7_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_2]] = add i32 [[L1_2]], [[SUM_2]] ; CHECK-NEXT: [[ADD10_2]] = add nuw i32 [[J_2]], 1 ; CHECK-NEXT: [[ARRAYIDX7_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]] -; CHECK-NEXT: [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX7_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX7_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_3]] = add i32 [[L1_3]], [[SUM_3]] ; CHECK-NEXT: [[ADD10_3]] = add nuw i32 [[J_3]], 1 ; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[ADD10_3]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]] -; CHECK: for.end.loopexit.unr-lcssa.loopexit: -; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], [[FOR_LATCH]] ] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]] -; CHECK: for.end.loopexit.unr-lcssa: -; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]] +; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ] +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]] +; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: for.outer.epil.preheader: -; CHECK-NEXT: br label [[FOR_OUTER_EPIL:%.*]] -; CHECK: for.outer.epil: +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END_LOOPEXIT:.*]] +; CHECK: [[FOR_OUTER_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_OUTER_EPIL:.*]] +; CHECK: [[FOR_OUTER_EPIL]]: ; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1 ; CHECK-NEXT: [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_EPIL]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_INNER_EPIL:%.*]] -; CHECK: for.inner.epil: -; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], [[FOR_INNER_EPIL]] ] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_EPIL]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER_EPIL:.*]] +; CHECK: [[FOR_INNER_EPIL]]: +; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[ARRAYIDX7_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]] -; CHECK-NEXT: [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_EPIL]] = add i32 [[L1_EPIL]], [[SUM_EPIL]] ; CHECK-NEXT: [[ADD10_EPIL]] = add nuw i32 [[J_EPIL]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[ADD10_EPIL]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]] -; CHECK: for.latch.epil: -; CHECK-NEXT: [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]] +; CHECK: [[FOR_LATCH_EPIL]]: +; CHECK-NEXT: [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]] -; CHECK: for.outer.epil.1: +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]] +; CHECK: [[FOR_OUTER_EPIL_1]]: ; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2 ; CHECK-NEXT: [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_EPIL_1]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_INNER_EPIL_1:%.*]] -; CHECK: for.inner.epil.1: -; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] -; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_1:.*]] +; CHECK: [[FOR_INNER_EPIL_1]]: +; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] ; CHECK-NEXT: [[ARRAYIDX7_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]] -; CHECK-NEXT: [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_EPIL_1]] = add i32 [[L1_EPIL_1]], [[SUM_EPIL_1]] ; CHECK-NEXT: [[ADD10_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[ADD10_EPIL_1]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]] -; CHECK: for.latch.epil.1: -; CHECK-NEXT: [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], [[FOR_INNER_EPIL_1]] ] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]] +; CHECK: [[FOR_LATCH_EPIL_1]]: +; CHECK-NEXT: [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], %[[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.outer.epil.2: +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]] +; CHECK: [[FOR_OUTER_EPIL_2]]: ; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL_2:%.*]] = add nuw i32 [[I_UNR]], 3 ; CHECK-NEXT: [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_2]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_EPIL_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_INNER_EPIL_2:%.*]] -; CHECK: for.inner.epil.2: -; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] -; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_2:.*]] +; CHECK: [[FOR_INNER_EPIL_2]]: +; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] ; CHECK-NEXT: [[ARRAYIDX7_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]] -; CHECK-NEXT: [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_EPIL_2]] = add i32 [[L1_EPIL_2]], [[SUM_EPIL_2]] ; CHECK-NEXT: [[ADD10_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[ADD10_EPIL_2]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]] -; CHECK: for.latch.epil.2: -; CHECK-NEXT: [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], [[FOR_INNER_EPIL_2]] ] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.end.loopexit.epilog-lcssa: -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]] +; CHECK: [[FOR_LATCH_EPIL_2]]: +; CHECK-NEXT: [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], %[[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]] +; CHECK: [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]: +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; entry: @@ -888,166 +893,167 @@ for.end: ; Same as test7 with an extra outer loop nest define void @test8(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocapture readonly %B) #0 { -; CHECK-LABEL: @test8( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[E:%.*]], 0 -; CHECK-NEXT: [[CMP336:%.*]] = icmp eq i32 [[I:%.*]], 0 +; CHECK-LABEL: define void @test8( +; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[E]], 0 +; CHECK-NEXT: [[CMP336:%.*]] = icmp eq i32 [[I]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP336]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_END:%.*]], label [[FOR_PREHEADER:%.*]] -; CHECK: for.preheader: +; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_END:.*]], label %[[FOR_PREHEADER:.*]] +; CHECK: [[FOR_PREHEADER]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[I]], -1 -; CHECK-NEXT: br label [[FOR_OUTEST:%.*]] -; CHECK: for.outest: -; CHECK-NEXT: [[X_038:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_CLEANUP:%.*]] ], [ 0, [[FOR_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_OUTEST:.*]] +; CHECK: [[FOR_OUTEST]]: +; CHECK-NEXT: [[X_038:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_CLEANUP:.*]] ], [ 0, %[[FOR_PREHEADER]] ] ; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[I]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_CLEANUP_UNR_LCSSA:%.*]], label [[FOR_OUTEST_NEW:%.*]] -; CHECK: for.outest.new: +; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_CLEANUP_UNR_LCSSA:.*]], label %[[FOR_OUTEST_NEW:.*]] +; CHECK: [[FOR_OUTEST_NEW]]: ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]] -; CHECK-NEXT: br label [[FOR_OUTER:%.*]] -; CHECK: for.outer: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTEST_NEW]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTEST_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br label %[[FOR_OUTER:.*]] +; CHECK: [[FOR_OUTER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTEST_NEW]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTEST_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[I]], 2 ; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[I]], 3 ; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_3]] = add nuw i32 [[I]], 4 ; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_3]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER]], 4 -; CHECK-NEXT: br label [[FOR_INNER:%.*]] -; CHECK: for.inner: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD10_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[J]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER:.*]] +; CHECK: [[FOR_INNER]]: +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]] +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9]] = add i32 [[L1]], [[SUM]] ; CHECK-NEXT: [[ADD10]] = add nuw i32 [[J]], 1 ; CHECK-NEXT: [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]] -; CHECK-NEXT: [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_1]] = add i32 [[L1_1]], [[SUM_1]] ; CHECK-NEXT: [[ADD10_1]] = add nuw i32 [[J_1]], 1 ; CHECK-NEXT: [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]] -; CHECK-NEXT: [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_2]] = add i32 [[L1_2]], [[SUM_2]] ; CHECK-NEXT: [[ADD10_2]] = add nuw i32 [[J_2]], 1 ; CHECK-NEXT: [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]] -; CHECK-NEXT: [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_3]] = add i32 [[L1_3]], [[SUM_3]] ; CHECK-NEXT: [[ADD10_3]] = add nuw i32 [[J_3]], 1 ; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[ADD10_3]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]] -; CHECK: for.latch: -; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3]], [[FOR_INNER]] ] -; CHECK-NEXT: store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]] +; CHECK: [[FOR_LATCH]]: +; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_INNER]] ] +; CHECK-NEXT: store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]] -; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: for.cleanup.unr-lcssa.loopexit: -; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], [[FOR_LATCH]] ] -; CHECK-NEXT: br label [[FOR_CLEANUP_UNR_LCSSA]] -; CHECK: for.cleanup.unr-lcssa: -; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTEST]] ], [ [[I_UNR_PH]], [[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label %[[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ] +; CHECK-NEXT: br label %[[FOR_CLEANUP_UNR_LCSSA]] +; CHECK: [[FOR_CLEANUP_UNR_LCSSA]]: +; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTEST]] ], [ [[I_UNR_PH]], %[[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT]] ] ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_CLEANUP]] -; CHECK: for.outer.epil.preheader: -; CHECK-NEXT: br label [[FOR_OUTER_EPIL:%.*]] -; CHECK: for.outer.epil: +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_CLEANUP]] +; CHECK: [[FOR_OUTER_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_OUTER_EPIL:.*]] +; CHECK: [[FOR_OUTER_EPIL]]: ; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1 ; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_INNER_EPIL:%.*]] -; CHECK: for.inner.epil: -; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], [[FOR_INNER_EPIL]] ] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER_EPIL:.*]] +; CHECK: [[FOR_INNER_EPIL]]: +; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[ARRAYIDX11_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]] -; CHECK-NEXT: [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_EPIL]] = add i32 [[L1_EPIL]], [[SUM_EPIL]] ; CHECK-NEXT: [[ADD10_EPIL]] = add nuw i32 [[J_EPIL]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[ADD10_EPIL]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]] -; CHECK: for.latch.epil: -; CHECK-NEXT: [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]] +; CHECK: [[FOR_LATCH_EPIL]]: +; CHECK-NEXT: [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_CLEANUP_EPILOG_LCSSA:%.*]] -; CHECK: for.outer.epil.1: +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_CLEANUP_EPILOG_LCSSA:.*]] +; CHECK: [[FOR_OUTER_EPIL_1]]: ; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2 ; CHECK-NEXT: [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_INNER_EPIL_1:%.*]] -; CHECK: for.inner.epil.1: -; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] -; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_1:.*]] +; CHECK: [[FOR_INNER_EPIL_1]]: +; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] ; CHECK-NEXT: [[ARRAYIDX11_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]] -; CHECK-NEXT: [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_EPIL_1]] = add i32 [[L1_EPIL_1]], [[SUM_EPIL_1]] ; CHECK-NEXT: [[ADD10_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[ADD10_EPIL_1]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]] -; CHECK: for.latch.epil.1: -; CHECK-NEXT: [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], [[FOR_INNER_EPIL_1]] ] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]] +; CHECK: [[FOR_LATCH_EPIL_1]]: +; CHECK-NEXT: [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], %[[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_CLEANUP_EPILOG_LCSSA]] -; CHECK: for.outer.epil.2: +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_CLEANUP_EPILOG_LCSSA]] +; CHECK: [[FOR_OUTER_EPIL_2]]: ; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]] -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD_EPIL_2:%.*]] = add nuw i32 [[I_UNR]], 3 ; CHECK-NEXT: [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_2]] -; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_INNER_EPIL_2:%.*]] -; CHECK: for.inner.epil.2: -; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] -; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_2:.*]] +; CHECK: [[FOR_INNER_EPIL_2]]: +; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] ; CHECK-NEXT: [[ARRAYIDX11_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]] -; CHECK-NEXT: [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD9_EPIL_2]] = add i32 [[L1_EPIL_2]], [[SUM_EPIL_2]] ; CHECK-NEXT: [[ADD10_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[ADD10_EPIL_2]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]] -; CHECK: for.latch.epil.2: -; CHECK-NEXT: [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], [[FOR_INNER_EPIL_2]] ] -; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_CLEANUP_EPILOG_LCSSA]] -; CHECK: for.cleanup.epilog-lcssa: -; CHECK-NEXT: br label [[FOR_CLEANUP]] -; CHECK: for.cleanup: +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]] +; CHECK: [[FOR_LATCH_EPIL_2]]: +; CHECK-NEXT: [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], %[[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_CLEANUP_EPILOG_LCSSA]] +; CHECK: [[FOR_CLEANUP_EPILOG_LCSSA]]: +; CHECK-NEXT: br label %[[FOR_CLEANUP]] +; CHECK: [[FOR_CLEANUP]]: ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[X_038]], 1 ; CHECK-NEXT: [[EXITCOND41:%.*]] = icmp eq i32 [[INC]], 5 -; CHECK-NEXT: br i1 [[EXITCOND41]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_OUTEST]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: +; CHECK-NEXT: br i1 [[EXITCOND41]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_OUTEST]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; entry: @@ -1099,144 +1105,145 @@ for.end: ; Same as test1 with tbaa, not noalias define void @test9(i32 %I, i32 %E, ptr nocapture %A, ptr nocapture readonly %B) #0 { -; CHECK-LABEL: @test9( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[E:%.*]], 0 -; CHECK-NEXT: [[CMPJ:%.*]] = icmp ne i32 [[I:%.*]], 0 +; CHECK-LABEL: define void @test9( +; CHECK-SAME: i32 [[I:%.*]], i32 [[E:%.*]], ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[E]], 0 +; CHECK-NEXT: [[CMPJ:%.*]] = icmp ne i32 [[I]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMPJ]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END:%.*]] -; CHECK: for.outer.preheader: +; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_OUTER_PREHEADER:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_OUTER_PREHEADER]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[I]], -1 ; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[I]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]] -; CHECK: for.outer.preheader.new: +; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]] +; CHECK: [[FOR_OUTER_PREHEADER_NEW]]: ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]] -; CHECK-NEXT: br label [[FOR_OUTER:%.*]] -; CHECK: for.outer: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ] +; CHECK-NEXT: br label %[[FOR_OUTER:.*]] +; CHECK: [[FOR_OUTER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ] ; CHECK-NEXT: [[ADD8:%.*]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[ADD8_1:%.*]] = add nuw nsw i32 [[I]], 2 ; CHECK-NEXT: [[ADD8_2:%.*]] = add nuw nsw i32 [[I]], 3 ; CHECK-NEXT: [[ADD8_3]] = add nuw i32 [[I]], 4 ; CHECK-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER]], 4 -; CHECK-NEXT: br label [[FOR_INNER:%.*]] -; CHECK: for.inner: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[J]] -; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: br label %[[FOR_INNER:.*]] +; CHECK: [[FOR_INNER]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_1:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_2:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J]] +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 4, !tbaa [[SHORT_TBAA10:![0-9]+]] ; CHECK-NEXT: [[SEXT:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[ADD]] = add i32 [[SEXT]], [[SUM]] ; CHECK-NEXT: [[INC]] = add nuw i32 [[J]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_1]] -; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA10]] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 4, !tbaa [[SHORT_TBAA10]] ; CHECK-NEXT: [[SEXT_1:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[ADD_1]] = add i32 [[SEXT_1]], [[SUM_1]] ; CHECK-NEXT: [[INC_1]] = add nuw i32 [[J_1]], 1 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_2]] -; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA10]] +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 4, !tbaa [[SHORT_TBAA10]] ; CHECK-NEXT: [[SEXT_2:%.*]] = sext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[ADD_2]] = add i32 [[SEXT_2]], [[SUM_2]] ; CHECK-NEXT: [[INC_2]] = add nuw i32 [[J_2]], 1 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_3]] -; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA10]] +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 4, !tbaa [[SHORT_TBAA10]] ; CHECK-NEXT: [[SEXT_3:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[ADD_3]] = add i32 [[SEXT_3]], [[SUM_3]] ; CHECK-NEXT: [[INC_3]] = add nuw i32 [[J_3]], 1 ; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]] -; CHECK: for.latch: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I]] -; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]] +; CHECK: [[FOR_LATCH]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_INNER]] ] +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_1]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_2]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]] -; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP12:![0-9]+]] -; CHECK: for.end.loopexit.unr-lcssa.loopexit: -; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], [[FOR_LATCH]] ] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]] -; CHECK: for.end.loopexit.unr-lcssa: -; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], %[[FOR_LATCH]] ] +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]] +; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: for.outer.epil.preheader: -; CHECK-NEXT: br label [[FOR_OUTER_EPIL:%.*]] -; CHECK: for.outer.epil: -; CHECK-NEXT: br label [[FOR_INNER_EPIL:%.*]] -; CHECK: for.inner.epil: -; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ] -; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], [[FOR_INNER_EPIL]] ] +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END_LOOPEXIT:.*]] +; CHECK: [[FOR_OUTER_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_OUTER_EPIL:.*]] +; CHECK: [[FOR_OUTER_EPIL]]: +; CHECK-NEXT: br label %[[FOR_INNER_EPIL:.*]] +; CHECK: [[FOR_INNER_EPIL]]: +; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_EPIL]] -; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA10]] +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[SHORT_TBAA10]] ; CHECK-NEXT: [[SEXT_EPIL:%.*]] = sext i16 [[TMP6]] to i32 ; CHECK-NEXT: [[ADD_EPIL]] = add i32 [[SEXT_EPIL]], [[SUM_EPIL]] ; CHECK-NEXT: [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]] -; CHECK: for.latch.epil: -; CHECK-NEXT: [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]] +; CHECK: [[FOR_LATCH_EPIL]]: +; CHECK-NEXT: [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD8_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1 ; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]] -; CHECK: for.outer.epil.1: -; CHECK-NEXT: br label [[FOR_INNER_EPIL_1:%.*]] -; CHECK: for.inner.epil.1: -; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] -; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]] +; CHECK: [[FOR_OUTER_EPIL_1]]: +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_1:.*]] +; CHECK: [[FOR_INNER_EPIL_1]]: +; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ] ; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_EPIL_1]] -; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA10]] +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[SHORT_TBAA10]] ; CHECK-NEXT: [[SEXT_EPIL_1:%.*]] = sext i16 [[TMP7]] to i32 ; CHECK-NEXT: [[ADD_EPIL_1]] = add i32 [[SEXT_EPIL_1]], [[SUM_EPIL_1]] ; CHECK-NEXT: [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]] -; CHECK: for.latch.epil.1: -; CHECK-NEXT: [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]] +; CHECK: [[FOR_LATCH_EPIL_1]]: +; CHECK-NEXT: [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], %[[FOR_INNER_EPIL_1]] ] ; CHECK-NEXT: [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_EPIL]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[ADD8_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2 ; CHECK-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]] -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.outer.epil.2: -; CHECK-NEXT: br label [[FOR_INNER_EPIL_2:%.*]] -; CHECK: for.inner.epil.2: -; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] -; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]] +; CHECK: [[FOR_OUTER_EPIL_2]]: +; CHECK-NEXT: br label %[[FOR_INNER_EPIL_2:.*]] +; CHECK: [[FOR_INNER_EPIL_2]]: +; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ] ; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_EPIL_2]] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA10]] +; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[SHORT_TBAA10]] ; CHECK-NEXT: [[SEXT_EPIL_2:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-NEXT: [[ADD_EPIL_2]] = add i32 [[SEXT_EPIL_2]], [[SUM_EPIL_2]] ; CHECK-NEXT: [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[E]] -; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]] -; CHECK: for.latch.epil.2: -; CHECK-NEXT: [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ] +; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]] +; CHECK: [[FOR_LATCH_EPIL_2]]: +; CHECK-NEXT: [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], %[[FOR_INNER_EPIL_2]] ] ; CHECK-NEXT: [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_EPIL_1]] -; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.end.loopexit.epilog-lcssa: -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: +; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]] +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]] +; CHECK: [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]: +; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; entry: @@ -1284,114 +1291,115 @@ for.end: @g = common global %struct.a zeroinitializer, align 8 @c = common global [1 x i8] zeroinitializer, align 1 define signext i16 @test10(i32 %k) #0 { -; CHECK-LABEL: @test10( -; CHECK-NEXT: entry: +; CHECK-LABEL: define signext i16 @test10( +; CHECK-SAME: i32 [[K:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @c, align 1 ; CHECK-NEXT: [[TOBOOL9:%.*]] = icmp eq i8 [[TMP0]], 0 -; CHECK-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[K:%.*]], 0 -; CHECK-NEXT: br i1 false, label [[FOR_END26_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] -; CHECK: entry.new: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[STOREMERGE82:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INC25_3:%.*]], [[FOR_INC24:%.*]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_INC24]] ] +; CHECK-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[K]], 0 +; CHECK-NEXT: br i1 false, label %[[FOR_END26_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[STOREMERGE82:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[INC25_3:%.*]], %[[FOR_INC24:.*]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_INC24]] ] ; CHECK-NEXT: [[INC25_3]] = add nuw nsw i64 [[STOREMERGE82]], 4 ; CHECK-NEXT: [[NITER_NEXT_3]] = add nuw nsw i64 [[NITER]], 4 -; CHECK-NEXT: br label [[FOR_BODY2:%.*]] -; CHECK: for.body2: -; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i64 [ 4, [[FOR_BODY]] ], [ [[DEC:%.*]], [[FOR_INC21_3:%.*]] ] -; CHECK-NEXT: [[STOREMERGE_14:%.*]] = phi i64 [ 4, [[FOR_BODY]] ], [ [[DEC_1:%.*]], [[FOR_INC21_3]] ] -; CHECK-NEXT: [[STOREMERGE_25:%.*]] = phi i64 [ 4, [[FOR_BODY]] ], [ [[DEC_2:%.*]], [[FOR_INC21_3]] ] -; CHECK-NEXT: [[STOREMERGE_36:%.*]] = phi i64 [ 4, [[FOR_BODY]] ], [ [[DEC_3:%.*]], [[FOR_INC21_3]] ] -; CHECK-NEXT: br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT:%.*]], label [[FOR_BODY2_SPLIT2:%.*]] -; CHECK: for.body2.split2: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21:%.*]], label [[FOR_INC21_IF:%.*]] -; CHECK: for.body2.split: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21]], label [[FOR_INC21_THEN:%.*]] -; CHECK: for.inc21.if: -; CHECK-NEXT: br label [[FOR_INC21]] -; CHECK: for.inc21.then: -; CHECK-NEXT: br label [[FOR_INC21]] -; CHECK: for.inc21: +; CHECK-NEXT: br label %[[FOR_BODY2:.*]] +; CHECK: [[FOR_BODY2]]: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i64 [ 4, %[[FOR_BODY]] ], [ [[DEC:%.*]], %[[FOR_INC21_3:.*]] ] +; CHECK-NEXT: [[STOREMERGE_14:%.*]] = phi i64 [ 4, %[[FOR_BODY]] ], [ [[DEC_1:%.*]], %[[FOR_INC21_3]] ] +; CHECK-NEXT: [[STOREMERGE_25:%.*]] = phi i64 [ 4, %[[FOR_BODY]] ], [ [[DEC_2:%.*]], %[[FOR_INC21_3]] ] +; CHECK-NEXT: [[STOREMERGE_36:%.*]] = phi i64 [ 4, %[[FOR_BODY]] ], [ [[DEC_3:%.*]], %[[FOR_INC21_3]] ] +; CHECK-NEXT: br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT:.*]], label %[[FOR_BODY2_SPLIT2:.*]] +; CHECK: [[FOR_BODY2_SPLIT2]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21:.*]], label %[[FOR_INC21_IF:.*]] +; CHECK: [[FOR_BODY2_SPLIT]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21]], label %[[FOR_INC21_THEN:.*]] +; CHECK: [[FOR_INC21_IF]]: +; CHECK-NEXT: br label %[[FOR_INC21]] +; CHECK: [[FOR_INC21_THEN]]: +; CHECK-NEXT: br label %[[FOR_INC21]] +; CHECK: [[FOR_INC21]]: ; CHECK-NEXT: [[DEC]] = add nsw i64 [[STOREMERGE]], -1 -; CHECK-NEXT: br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT_1:%.*]], label [[FOR_BODY2_SPLIT2_1:%.*]] -; CHECK: for.inc24: -; CHECK-NEXT: [[STOREMERGE_4_LCSSA_3:%.*]] = phi i64 [ [[STOREMERGE_4_3:%.*]], [[FOR_INC21_3]] ] -; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_END26_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]] -; CHECK: for.end26.unr-lcssa.loopexit: -; CHECK-NEXT: [[DEC_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ 0, [[FOR_INC24]] ] -; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], [[FOR_INC24]] ] -; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 0, [[FOR_INC24]] ] -; CHECK-NEXT: br label [[FOR_END26_UNR_LCSSA]] -; CHECK: for.end26.unr-lcssa: -; CHECK-NEXT: [[DEC_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[DEC_LCSSA_LCSSA_PH_PH]], [[FOR_END26_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, [[ENTRY]] ], [ [[STOREMERGE_4_LCSSA_LCSSA_PH_PH]], [[FOR_END26_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[STOREMERGE_5_LCSSA_LCSSA_PH_PH]], [[FOR_END26_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: br i1 true, label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_END26:%.*]] -; CHECK: for.body.epil.preheader: -; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]] -; CHECK: for.body.epil: -; CHECK-NEXT: br label [[FOR_BODY2_EPIL:%.*]] -; CHECK: for.body2.epil: -; CHECK-NEXT: [[STOREMERGE_EPIL:%.*]] = phi i64 [ 4, [[FOR_BODY_EPIL]] ], [ [[DEC_EPIL:%.*]], [[FOR_INC21_EPIL:%.*]] ] -; CHECK-NEXT: br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT_EPIL:%.*]], label [[FOR_BODY2_SPLIT2_EPIL:%.*]] -; CHECK: for.body2.split2.epil: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21_EPIL]], label [[FOR_INC21_IF_EPIL:%.*]] -; CHECK: for.inc21.if.epil: -; CHECK-NEXT: br label [[FOR_INC21_EPIL]] -; CHECK: for.body2.split.epil: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21_EPIL]], label [[FOR_INC21_THEN_EPIL:%.*]] -; CHECK: for.inc21.then.epil: -; CHECK-NEXT: br label [[FOR_INC21_EPIL]] -; CHECK: for.inc21.epil: -; CHECK-NEXT: [[STOREMERGE_4_EPIL:%.*]] = phi i64 [ 0, [[FOR_INC21_IF_EPIL]] ], [ 0, [[FOR_INC21_THEN_EPIL]] ], [ 4, [[FOR_BODY2_SPLIT2_EPIL]] ], [ 4, [[FOR_BODY2_SPLIT_EPIL]] ] +; CHECK-NEXT: br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_1:.*]], label %[[FOR_BODY2_SPLIT2_1:.*]] +; CHECK: [[FOR_INC24]]: +; CHECK-NEXT: [[STOREMERGE_4_LCSSA_3:%.*]] = phi i64 [ [[STOREMERGE_4_3:%.*]], %[[FOR_INC21_3]] ] +; CHECK-NEXT: br i1 false, label %[[FOR_BODY]], label %[[FOR_END26_UNR_LCSSA_LOOPEXIT:.*]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[FOR_END26_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[DEC_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ 0, %[[FOR_INC24]] ] +; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], %[[FOR_INC24]] ] +; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 0, %[[FOR_INC24]] ] +; CHECK-NEXT: br label %[[FOR_END26_UNR_LCSSA]] +; CHECK: [[FOR_END26_UNR_LCSSA]]: +; CHECK-NEXT: [[DEC_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, %[[ENTRY]] ], [ [[DEC_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, %[[ENTRY]] ], [ [[STOREMERGE_4_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[STOREMERGE_5_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: br i1 true, label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[FOR_END26:.*]] +; CHECK: [[FOR_BODY_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY_EPIL:.*]] +; CHECK: [[FOR_BODY_EPIL]]: +; CHECK-NEXT: br label %[[FOR_BODY2_EPIL:.*]] +; CHECK: [[FOR_BODY2_EPIL]]: +; CHECK-NEXT: [[STOREMERGE_EPIL:%.*]] = phi i64 [ 4, %[[FOR_BODY_EPIL]] ], [ [[DEC_EPIL:%.*]], %[[FOR_INC21_EPIL:.*]] ] +; CHECK-NEXT: br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_EPIL:.*]], label %[[FOR_BODY2_SPLIT2_EPIL:.*]] +; CHECK: [[FOR_BODY2_SPLIT2_EPIL]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21_EPIL]], label %[[FOR_INC21_IF_EPIL:.*]] +; CHECK: [[FOR_INC21_IF_EPIL]]: +; CHECK-NEXT: br label %[[FOR_INC21_EPIL]] +; CHECK: [[FOR_BODY2_SPLIT_EPIL]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21_EPIL]], label %[[FOR_INC21_THEN_EPIL:.*]] +; CHECK: [[FOR_INC21_THEN_EPIL]]: +; CHECK-NEXT: br label %[[FOR_INC21_EPIL]] +; CHECK: [[FOR_INC21_EPIL]]: +; CHECK-NEXT: [[STOREMERGE_4_EPIL:%.*]] = phi i64 [ 0, %[[FOR_INC21_IF_EPIL]] ], [ 0, %[[FOR_INC21_THEN_EPIL]] ], [ 4, %[[FOR_BODY2_SPLIT2_EPIL]] ], [ 4, %[[FOR_BODY2_SPLIT_EPIL]] ] ; CHECK-NEXT: [[DEC_EPIL]] = add nsw i64 [[STOREMERGE_EPIL]], -1 ; CHECK-NEXT: [[TOBOOL_EPIL:%.*]] = icmp eq i64 [[DEC_EPIL]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_EPIL]], label [[FOR_INC24_EPIL:%.*]], label [[FOR_BODY2_EPIL]] -; CHECK: for.inc24.epil: -; CHECK-NEXT: [[STOREMERGE_4_LCSSA_EPIL:%.*]] = phi i64 [ [[STOREMERGE_4_EPIL]], [[FOR_INC21_EPIL]] ] -; CHECK-NEXT: br label [[FOR_END26]] -; CHECK: for.end26: -; CHECK-NEXT: [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH]], [[FOR_END26_UNR_LCSSA]] ], [ 0, [[FOR_INC24_EPIL]] ] -; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH]], [[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], [[FOR_INC24_EPIL]] ] -; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH]], [[FOR_END26_UNR_LCSSA]] ], [ 0, [[FOR_INC24_EPIL]] ] +; CHECK-NEXT: br i1 [[TOBOOL_EPIL]], label %[[FOR_INC24_EPIL:.*]], label %[[FOR_BODY2_EPIL]] +; CHECK: [[FOR_INC24_EPIL]]: +; CHECK-NEXT: [[STOREMERGE_4_LCSSA_EPIL:%.*]] = phi i64 [ [[STOREMERGE_4_EPIL]], %[[FOR_INC21_EPIL]] ] +; CHECK-NEXT: br label %[[FOR_END26]] +; CHECK: [[FOR_END26]]: +; CHECK-NEXT: [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ] +; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], %[[FOR_INC24_EPIL]] ] +; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ] ; CHECK-NEXT: store i64 [[DEC_LCSSA_LCSSA]], ptr @g, align 8 ; CHECK-NEXT: ret i16 0 -; CHECK: for.body2.split2.1: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21_1:%.*]], label [[FOR_INC21_IF_1:%.*]] -; CHECK: for.inc21.if.1: -; CHECK-NEXT: br label [[FOR_INC21_1]] -; CHECK: for.body2.split.1: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21_1]], label [[FOR_INC21_THEN_1:%.*]] -; CHECK: for.inc21.then.1: -; CHECK-NEXT: br label [[FOR_INC21_1]] -; CHECK: for.inc21.1: +; CHECK: [[FOR_BODY2_SPLIT2_1]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21_1:.*]], label %[[FOR_INC21_IF_1:.*]] +; CHECK: [[FOR_INC21_IF_1]]: +; CHECK-NEXT: br label %[[FOR_INC21_1]] +; CHECK: [[FOR_BODY2_SPLIT_1]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21_1]], label %[[FOR_INC21_THEN_1:.*]] +; CHECK: [[FOR_INC21_THEN_1]]: +; CHECK-NEXT: br label %[[FOR_INC21_1]] +; CHECK: [[FOR_INC21_1]]: ; CHECK-NEXT: [[DEC_1]] = add nsw i64 [[STOREMERGE_14]], -1 -; CHECK-NEXT: br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT_2:%.*]], label [[FOR_BODY2_SPLIT2_2:%.*]] -; CHECK: for.body2.split2.2: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21_2:%.*]], label [[FOR_INC21_IF_2:%.*]] -; CHECK: for.inc21.if.2: -; CHECK-NEXT: br label [[FOR_INC21_2]] -; CHECK: for.body2.split.2: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21_2]], label [[FOR_INC21_THEN_2:%.*]] -; CHECK: for.inc21.then.2: -; CHECK-NEXT: br label [[FOR_INC21_2]] -; CHECK: for.inc21.2: +; CHECK-NEXT: br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_2:.*]], label %[[FOR_BODY2_SPLIT2_2:.*]] +; CHECK: [[FOR_BODY2_SPLIT2_2]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21_2:.*]], label %[[FOR_INC21_IF_2:.*]] +; CHECK: [[FOR_INC21_IF_2]]: +; CHECK-NEXT: br label %[[FOR_INC21_2]] +; CHECK: [[FOR_BODY2_SPLIT_2]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21_2]], label %[[FOR_INC21_THEN_2:.*]] +; CHECK: [[FOR_INC21_THEN_2]]: +; CHECK-NEXT: br label %[[FOR_INC21_2]] +; CHECK: [[FOR_INC21_2]]: ; CHECK-NEXT: [[DEC_2]] = add nsw i64 [[STOREMERGE_25]], -1 -; CHECK-NEXT: br i1 [[TOBOOL9]], label [[FOR_BODY2_SPLIT_3:%.*]], label [[FOR_BODY2_SPLIT2_3:%.*]] -; CHECK: for.body2.split2.3: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21_3]], label [[FOR_INC21_IF_3:%.*]] -; CHECK: for.inc21.if.3: -; CHECK-NEXT: br label [[FOR_INC21_3]] -; CHECK: for.body2.split.3: -; CHECK-NEXT: br i1 [[TOBOOL13]], label [[FOR_INC21_3]], label [[FOR_INC21_THEN_3:%.*]] -; CHECK: for.inc21.then.3: -; CHECK-NEXT: br label [[FOR_INC21_3]] -; CHECK: for.inc21.3: -; CHECK-NEXT: [[STOREMERGE_4_3]] = phi i64 [ 0, [[FOR_INC21_IF_3]] ], [ 0, [[FOR_INC21_THEN_3]] ], [ 4, [[FOR_BODY2_SPLIT2_3]] ], [ 4, [[FOR_BODY2_SPLIT_3]] ] +; CHECK-NEXT: br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_3:.*]], label %[[FOR_BODY2_SPLIT2_3:.*]] +; CHECK: [[FOR_BODY2_SPLIT2_3]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21_3]], label %[[FOR_INC21_IF_3:.*]] +; CHECK: [[FOR_INC21_IF_3]]: +; CHECK-NEXT: br label %[[FOR_INC21_3]] +; CHECK: [[FOR_BODY2_SPLIT_3]]: +; CHECK-NEXT: br i1 [[TOBOOL13]], label %[[FOR_INC21_3]], label %[[FOR_INC21_THEN_3:.*]] +; CHECK: [[FOR_INC21_THEN_3]]: +; CHECK-NEXT: br label %[[FOR_INC21_3]] +; CHECK: [[FOR_INC21_3]]: +; CHECK-NEXT: [[STOREMERGE_4_3]] = phi i64 [ 0, %[[FOR_INC21_IF_3]] ], [ 0, %[[FOR_INC21_THEN_3]] ], [ 4, %[[FOR_BODY2_SPLIT2_3]] ], [ 4, %[[FOR_BODY2_SPLIT_3]] ] ; CHECK-NEXT: [[DEC_3]] = add nsw i64 [[STOREMERGE_36]], -1 ; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i64 [[DEC_3]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_3]], label [[FOR_INC24]], label [[FOR_BODY2]] +; CHECK-NEXT: br i1 [[TOBOOL_3]], label %[[FOR_INC24]], label %[[FOR_BODY2]] ; entry: %0 = load i8, ptr @c, align 1 @@ -1451,3 +1459,19 @@ for.end26: !8 = !{!"Simple C/C++ TBAA"} !9 = !{!10, !10, i64 0} !10 = !{!"short", !7, i64 0} +;. +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]} +; CHECK: [[META5]] = !{!"llvm.loop.unroll.disable"} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META5]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META5]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]} +; CHECK: [[SHORT_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +; CHECK: [[META11]] = !{!"short", [[META2]], i64 0} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll index 8e3af54b770e8..4cff8753ba9b1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s ; This is a bugpoint reduction of a test from PR43582: @@ -12,31 +12,32 @@ target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 target triple = "x86_64-w64-windows-gnu" define void @cff_index_load_offsets(i1 %cond, i8 %x, ptr %p) #0 { -; CHECK-LABEL: @cff_index_load_offsets( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[FOR_BODY68:%.*]] -; CHECK: for.body68: -; CHECK-NEXT: [[P_359:%.*]] = phi ptr [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ null, [[IF_THEN]] ] -; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X:%.*]] to i32 +; CHECK-LABEL: define void @cff_index_load_offsets( +; CHECK-SAME: i1 [[COND:%.*]], i8 [[X:%.*]], ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[COND]], label %[[IF_THEN:.*]], label %[[EXIT:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: br label %[[FOR_BODY68:.*]] +; CHECK: [[FOR_BODY68]]: +; CHECK-NEXT: [[P_359:%.*]] = phi ptr [ [[ADD_PTR86:%.*]], %[[FOR_BODY68]] ], [ null, %[[IF_THEN]] ] +; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X]] to i32 ; CHECK-NEXT: [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24 -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[CHAR_TBAA1:![0-9]+]] ; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16 ; CHECK-NEXT: [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]] -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr undef, align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr undef, align 1, !tbaa [[CHAR_TBAA1]] ; CHECK-NEXT: [[SHL78:%.*]] = shl nuw nsw i32 undef, 8 ; CHECK-NEXT: [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]] ; CHECK-NEXT: [[CONV81:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[OR83:%.*]] = or i32 [[OR79]], [[CONV81]] -; CHECK-NEXT: store i32 [[OR83]], ptr undef, align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: store i32 [[OR83]], ptr undef, align 4, !tbaa [[LONG_TBAA4:![0-9]+]] ; CHECK-NEXT: [[ADD_PTR86]] = getelementptr inbounds i8, ptr [[P_359]], i64 4 ; CHECK-NEXT: [[CMP66:%.*]] = icmp ult ptr [[ADD_PTR86]], undef -; CHECK-NEXT: br i1 [[CMP66]], label [[FOR_BODY68]], label [[SW_EPILOG:%.*]] -; CHECK: sw.epilog: +; CHECK-NEXT: br i1 [[CMP66]], label %[[FOR_BODY68]], label %[[SW_EPILOG:.*]] +; CHECK: [[SW_EPILOG]]: ; CHECK-NEXT: unreachable -; CHECK: Exit: +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: @@ -80,3 +81,10 @@ attributes #0 = { "use-soft-float"="false" } !3 = !{!"Simple C/C++ TBAA"} !4 = !{!5, !5, i64 0} !5 = !{!"long", !2, i64 0} +;. +; CHECK: [[CHAR_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[LONG_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"long", [[META2]], i64 0} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll index 994cd331c4194..8a48f997052f0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=loop-vectorize -mcpu=skylake-avx512 -S %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" @@ -7,35 +7,36 @@ target triple = "x86_64-unknown-linux-gnu" @jlplt_ijl_alloc_array_1d_10294_got = external dso_local local_unnamed_addr global ptr define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) local_unnamed_addr #0 { -; CHECK-LABEL: @japi1_vect_42283( -; CHECK-NEXT: iter.check: -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1:%.*]] to i64 +; CHECK-LABEL: define ptr addrspace(10) @japi1_vect_42283( +; CHECK-SAME: ptr readonly captures(none) [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = load atomic ptr, ptr @jlplt_ijl_alloc_array_1d_10294_got unordered, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = tail call ptr addrspace(10) [[TMP3]](ptr addrspace(10) null, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(10), ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(10), ptr [[TMP0]], align 8, !tbaa [[JTBAA_VALUE_TBAA0:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(10) [[TMP4]] to ptr addrspace(11) -; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(13), ptr addrspace(11) [[TMP6]], align 8, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(13), ptr addrspace(11) [[TMP6]], align 8, !tbaa [[JTBAA_ARRAYPTR_TBAA5:![0-9]+]] ; CHECK-NEXT: [[DOTELT:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 0 -; CHECK-NEXT: [[DOTUNPACK:%.*]] = load ptr addrspace(10), ptr addrspace(10) [[DOTELT]], align 8, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: [[DOTUNPACK:%.*]] = load ptr addrspace(10), ptr addrspace(10) [[DOTELT]], align 8, !tbaa [[JTBAA_IMMUT_TBAA8:![0-9]+]] ; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 1 -; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i64, ptr addrspace(10) [[DOTELT1]], align 8, !tbaa [[TBAA8]] +; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i64, ptr addrspace(10) [[DOTELT1]], align 8, !tbaa [[JTBAA_IMMUT_TBAA8]] ; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP2]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[TOP:%.*]] -; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[TOP:.*]] +; CHECK: [[TOP]]: ; CHECK-NEXT: [[TMP17:%.*]] = icmp ult i64 [[TMP8]], 16 -; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: +; CHECK-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], splat (i64 4) @@ -43,31 +44,31 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 0 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP18]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10:![0-9]+]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP19]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP20]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP21]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP18]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP19]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP20]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP21]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 1 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP22]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP23]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP24]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP25]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP22]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP23]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP24]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP25]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] -; CHECK: middle.block: +; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK1:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK1]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[L44:%.*]], label [[MIDDLE_BLOCK:%.*]] -; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[L44:.*]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[SCALAR_PH]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TOP]] ] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[SCALAR_PH]], !prof [[PROF15:![0-9]+]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[TOP]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP8]], 4 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF4]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0 @@ -77,34 +78,34 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], -; CHECK-NEXT: br label [[L26:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT14:%.*]], [[L26]] ] -; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <4 x i64> [ [[INDUCTION]], [[SCALAR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[L26]] ] +; CHECK-NEXT: br label %[[L26:.*]] +; CHECK: [[L26]]: +; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDEX_NEXT14:%.*]], %[[L26]] ] +; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[SCALAR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], %[[L26]] ] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND8]], i32 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT11]], <4 x ptr addrspace(13)> [[TMP28]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT11]], <4 x ptr addrspace(13)> [[TMP28]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND8]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT13]], <4 x ptr addrspace(13)> [[TMP29]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT13]], <4 x ptr addrspace(13)> [[TMP29]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i64> [[VEC_IND8]], splat (i64 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[L26]], !llvm.loop [[LOOP15:![0-9]+]] -; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: br i1 [[TMP30]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[L26]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[CMP_N15]], label [[L44]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: br label [[L27:%.*]] -; CHECK: L26: -; CHECK-NEXT: [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP27:%.*]], [[L27]] ] +; CHECK-NEXT: br i1 [[CMP_N15]], label %[[L44]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: br label %[[L27:.*]] +; CHECK: [[L27]]: +; CHECK-NEXT: [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP27:%.*]], %[[L27]] ] ; CHECK-NEXT: [[DOTREPACK:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], i64 [[VALUE_PHI5]], i32 0 -; CHECK-NEXT: store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[DOTREPACK]], align 8, !tbaa [[TBAA10]] +; CHECK-NEXT: store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[DOTREPACK]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[DOTREPACK4:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], i64 [[VALUE_PHI5]], i32 1 -; CHECK-NEXT: store i64 [[DOTUNPACK2]], ptr addrspace(13) [[DOTREPACK4]], align 8, !tbaa [[TBAA10]] +; CHECK-NEXT: store i64 [[DOTUNPACK2]], ptr addrspace(13) [[DOTREPACK4]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[TMP27]] = add i64 [[VALUE_PHI5]], 1 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[VALUE_PHI5]], [[TMP2]] -; CHECK-NEXT: br i1 [[DOTNOT]], label [[L44]], label [[L27]], !llvm.loop [[LOOP16:![0-9]+]] -; CHECK: L44: +; CHECK-NEXT: br i1 [[DOTNOT]], label %[[L44]], label %[[L27]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: [[L44]]: ; CHECK-NEXT: ret ptr addrspace(10) null ; top: @@ -146,3 +147,23 @@ L44: ; preds = %L26 !9 = !{!"jtbaa_immut", !1, i64 0} !10 = !{!11, !11, i64 0} !11 = !{!"jtbaa_arraybuf", !2, i64 0} +;. +; CHECK: [[JTBAA_VALUE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"jtbaa_value", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"jtbaa_data", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"jtbaa", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"jtbaa"} +; CHECK: [[JTBAA_ARRAYPTR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"jtbaa_arrayptr", [[META7:![0-9]+]], i64 0} +; CHECK: [[META7]] = !{!"jtbaa_array", [[META3]], i64 0} +; CHECK: [[JTBAA_IMMUT_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +; CHECK: [[META9]] = !{!"jtbaa_immut", [[META1]], i64 0} +; CHECK: [[JTBAA_ARRAYBUF_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +; CHECK: [[META11]] = !{!"jtbaa_arraybuf", [[META2]], i64 0} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META13:![0-9]+]], [[META14:![0-9]+]]} +; CHECK: [[META13]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META14]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[PROF15]] = !{!"branch_weights", i32 4, i32 12} +; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META13]], [[META14]]} +; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META14]], [[META13]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 35f61b2aa838a..050243faa49f4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=loop-vectorize -S -o - | FileCheck %s ; RUN: opt < %s -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -o - | FileCheck --check-prefix=MAX-BW %s @@ -10,21 +10,22 @@ target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: norecurse nounwind readonly uwtable define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_unnamed_addr #0 { -; CHECK-LABEL: @matrix_row_col( -; CHECK-NEXT: iter.check: -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I:%.*]] to i64 -; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[J:%.*]] to i64 -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP144:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP145:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP146:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP147:%.*]], [[VECTOR_BODY]] ] +; CHECK-LABEL: define i32 @matrix_row_col( +; CHECK-SAME: ptr readonly captures(none) [[DATA:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[J]] to i64 +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]] +; CHECK: [[VECTOR_PH1]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP144:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -57,14 +58,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[TBAA1:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] @@ -97,14 +98,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP80:%.*]] = insertelement <8 x i32> poison, i32 [[TMP72]], i32 0 ; CHECK-NEXT: [[TMP81:%.*]] = insertelement <8 x i32> [[TMP80]], i32 [[TMP73]], i32 1 ; CHECK-NEXT: [[TMP82:%.*]] = insertelement <8 x i32> [[TMP81]], i32 [[TMP74]], i32 2 @@ -113,14 +114,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP85:%.*]] = insertelement <8 x i32> [[TMP84]], i32 [[TMP77]], i32 5 ; CHECK-NEXT: [[TMP86:%.*]] = insertelement <8 x i32> [[TMP85]], i32 [[TMP78]], i32 6 ; CHECK-NEXT: [[TMP87:%.*]] = insertelement <8 x i32> [[TMP86]], i32 [[TMP79]], i32 7 -; CHECK-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP96:%.*]] = insertelement <8 x i32> poison, i32 [[TMP88]], i32 0 ; CHECK-NEXT: [[TMP97:%.*]] = insertelement <8 x i32> [[TMP96]], i32 [[TMP89]], i32 1 ; CHECK-NEXT: [[TMP98:%.*]] = insertelement <8 x i32> [[TMP97]], i32 [[TMP90]], i32 2 @@ -129,14 +130,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP101:%.*]] = insertelement <8 x i32> [[TMP100]], i32 [[TMP93]], i32 5 ; CHECK-NEXT: [[TMP102:%.*]] = insertelement <8 x i32> [[TMP101]], i32 [[TMP94]], i32 6 ; CHECK-NEXT: [[TMP103:%.*]] = insertelement <8 x i32> [[TMP102]], i32 [[TMP95]], i32 7 -; CHECK-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP112:%.*]] = insertelement <8 x i32> poison, i32 [[TMP104]], i32 0 ; CHECK-NEXT: [[TMP113:%.*]] = insertelement <8 x i32> [[TMP112]], i32 [[TMP105]], i32 1 ; CHECK-NEXT: [[TMP114:%.*]] = insertelement <8 x i32> [[TMP113]], i32 [[TMP106]], i32 2 @@ -145,14 +146,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP117:%.*]] = insertelement <8 x i32> [[TMP116]], i32 [[TMP109]], i32 5 ; CHECK-NEXT: [[TMP118:%.*]] = insertelement <8 x i32> [[TMP117]], i32 [[TMP110]], i32 6 ; CHECK-NEXT: [[TMP119:%.*]] = insertelement <8 x i32> [[TMP118]], i32 [[TMP111]], i32 7 -; CHECK-NEXT: [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP128:%.*]] = insertelement <8 x i32> poison, i32 [[TMP120]], i32 0 ; CHECK-NEXT: [[TMP129:%.*]] = insertelement <8 x i32> [[TMP128]], i32 [[TMP121]], i32 1 ; CHECK-NEXT: [[TMP130:%.*]] = insertelement <8 x i32> [[TMP129]], i32 [[TMP122]], i32 2 @@ -175,37 +176,37 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP147]] = add <8 x i32> [[TMP143]], [[TMP139]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP148:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 -; CHECK-NEXT: br i1 [[TMP148]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: middle.block: +; CHECK-NEXT: br i1 [[TMP148]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP145]], [[TMP144]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <8 x i32> [[TMP146]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <8 x i32> [[TMP147]], [[BIN_RDX7]] ; CHECK-NEXT: [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX8]]) -; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] +; CHECK-NEXT: br i1 false, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] ; CHECK-NEXT: [[TMP171:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], [[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 ; CHECK-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 ; CHECK-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 ; CHECK-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3 ; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]] -; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP162:%.*]] = insertelement <4 x i32> poison, i32 [[TMP158]], i32 0 ; CHECK-NEXT: [[TMP163:%.*]] = insertelement <4 x i32> [[TMP162]], i32 [[TMP159]], i32 1 ; CHECK-NEXT: [[TMP164:%.*]] = insertelement <4 x i32> [[TMP163]], i32 [[TMP160]], i32 2 @@ -215,46 +216,47 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP168]] = add <4 x i32> [[TMP167]], [[TMP166]] ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 4 ; CHECK-NEXT: [[TMP169:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 100 -; CHECK-NEXT: br i1 [[TMP169]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: br i1 [[TMP169]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]]) -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; CHECK-NEXT: br label [[FOR_BODY1:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY1]] ], [ [[TMP149]], [[MIDDLE_BLOCK]] ], [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: br label %[[FOR_BODY1:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY1]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ADD7_LCSSA]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ] -; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY1]] ] +; CHECK: [[FOR_BODY1]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY1]] ] +; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY1]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP151]], [[TMP150]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUM_015]], 4 ; CHECK-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] ; -; MAX-BW-LABEL: @matrix_row_col( -; MAX-BW-NEXT: iter.check: -; MAX-BW-NEXT: [[IDXPROM:%.*]] = sext i32 [[I:%.*]] to i64 -; MAX-BW-NEXT: [[IDXPROM5:%.*]] = sext i32 [[J:%.*]] to i64 -; MAX-BW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; MAX-BW: vector.main.loop.iter.check: -; MAX-BW-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] -; MAX-BW: vector.ph: -; MAX-BW-NEXT: br label [[VECTOR_BODY:%.*]] -; MAX-BW: vector.body: -; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP144:%.*]], [[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP145:%.*]], [[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP146:%.*]], [[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP147:%.*]], [[VECTOR_BODY]] ] +; MAX-BW-LABEL: define i32 @matrix_row_col( +; MAX-BW-SAME: ptr readonly captures(none) [[DATA:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; MAX-BW-NEXT: [[ITER_CHECK:.*]]: +; MAX-BW-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 +; MAX-BW-NEXT: [[IDXPROM5:%.*]] = sext i32 [[J]] to i64 +; MAX-BW-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; MAX-BW: [[VECTOR_PH]]: +; MAX-BW-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]] +; MAX-BW: [[VECTOR_PH1]]: +; MAX-BW-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX-BW: [[VECTOR_BODY]]: +; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP144:%.*]], %[[VECTOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] ; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -287,14 +289,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; MAX-BW-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; MAX-BW-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]] +; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] ; MAX-BW-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 ; MAX-BW-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 ; MAX-BW-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 -; MAX-BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[TBAA1:![0-9]+]] -; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] +; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] @@ -327,14 +329,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]] -; MAX-BW-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[TBAA1]] +; MAX-BW-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP80:%.*]] = insertelement <8 x i32> poison, i32 [[TMP72]], i32 0 ; MAX-BW-NEXT: [[TMP81:%.*]] = insertelement <8 x i32> [[TMP80]], i32 [[TMP73]], i32 1 ; MAX-BW-NEXT: [[TMP82:%.*]] = insertelement <8 x i32> [[TMP81]], i32 [[TMP74]], i32 2 @@ -343,14 +345,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP85:%.*]] = insertelement <8 x i32> [[TMP84]], i32 [[TMP77]], i32 5 ; MAX-BW-NEXT: [[TMP86:%.*]] = insertelement <8 x i32> [[TMP85]], i32 [[TMP78]], i32 6 ; MAX-BW-NEXT: [[TMP87:%.*]] = insertelement <8 x i32> [[TMP86]], i32 [[TMP79]], i32 7 -; MAX-BW-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[TBAA1]] +; MAX-BW-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP96:%.*]] = insertelement <8 x i32> poison, i32 [[TMP88]], i32 0 ; MAX-BW-NEXT: [[TMP97:%.*]] = insertelement <8 x i32> [[TMP96]], i32 [[TMP89]], i32 1 ; MAX-BW-NEXT: [[TMP98:%.*]] = insertelement <8 x i32> [[TMP97]], i32 [[TMP90]], i32 2 @@ -359,14 +361,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP101:%.*]] = insertelement <8 x i32> [[TMP100]], i32 [[TMP93]], i32 5 ; MAX-BW-NEXT: [[TMP102:%.*]] = insertelement <8 x i32> [[TMP101]], i32 [[TMP94]], i32 6 ; MAX-BW-NEXT: [[TMP103:%.*]] = insertelement <8 x i32> [[TMP102]], i32 [[TMP95]], i32 7 -; MAX-BW-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[TBAA1]] +; MAX-BW-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP112:%.*]] = insertelement <8 x i32> poison, i32 [[TMP104]], i32 0 ; MAX-BW-NEXT: [[TMP113:%.*]] = insertelement <8 x i32> [[TMP112]], i32 [[TMP105]], i32 1 ; MAX-BW-NEXT: [[TMP114:%.*]] = insertelement <8 x i32> [[TMP113]], i32 [[TMP106]], i32 2 @@ -375,14 +377,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP117:%.*]] = insertelement <8 x i32> [[TMP116]], i32 [[TMP109]], i32 5 ; MAX-BW-NEXT: [[TMP118:%.*]] = insertelement <8 x i32> [[TMP117]], i32 [[TMP110]], i32 6 ; MAX-BW-NEXT: [[TMP119:%.*]] = insertelement <8 x i32> [[TMP118]], i32 [[TMP111]], i32 7 -; MAX-BW-NEXT: [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[TBAA1]] +; MAX-BW-NEXT: [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP128:%.*]] = insertelement <8 x i32> poison, i32 [[TMP120]], i32 0 ; MAX-BW-NEXT: [[TMP129:%.*]] = insertelement <8 x i32> [[TMP128]], i32 [[TMP121]], i32 1 ; MAX-BW-NEXT: [[TMP130:%.*]] = insertelement <8 x i32> [[TMP129]], i32 [[TMP122]], i32 2 @@ -405,37 +407,37 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP147]] = add <8 x i32> [[TMP143]], [[TMP139]] ; MAX-BW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; MAX-BW-NEXT: [[TMP148:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 -; MAX-BW-NEXT: br i1 [[TMP148]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; MAX-BW: middle.block: +; MAX-BW-NEXT: br i1 [[TMP148]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; MAX-BW: [[MIDDLE_BLOCK]]: ; MAX-BW-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP145]], [[TMP144]] ; MAX-BW-NEXT: [[BIN_RDX7:%.*]] = add <8 x i32> [[TMP146]], [[BIN_RDX]] ; MAX-BW-NEXT: [[BIN_RDX8:%.*]] = add <8 x i32> [[TMP147]], [[BIN_RDX7]] ; MAX-BW-NEXT: [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX8]]) -; MAX-BW-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; MAX-BW: vec.epilog.iter.check: -; MAX-BW-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] -; MAX-BW: vec.epilog.ph: -; MAX-BW-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] -; MAX-BW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] +; MAX-BW-NEXT: br i1 false, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; MAX-BW: [[VEC_EPILOG_ITER_CHECK]]: +; MAX-BW-NEXT: br i1 false, label %[[SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; MAX-BW: [[VEC_EPILOG_PH]]: +; MAX-BW-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] +; MAX-BW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] ; MAX-BW-NEXT: [[TMP171:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 -; MAX-BW-NEXT: br label [[FOR_BODY:%.*]] -; MAX-BW: vec.epilog.vector.body: -; MAX-BW-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[FOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], [[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], [[FOR_BODY]] ] +; MAX-BW-NEXT: br label %[[FOR_BODY:.*]] +; MAX-BW: [[FOR_BODY]]: +; MAX-BW-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[FOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[FOR_BODY]] ] ; MAX-BW-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 ; MAX-BW-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 ; MAX-BW-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 ; MAX-BW-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3 ; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]] -; MAX-BW-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]] -; MAX-BW-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[TBAA1]] -; MAX-BW-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[TBAA1]] +; MAX-BW-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP162:%.*]] = insertelement <4 x i32> poison, i32 [[TMP158]], i32 0 ; MAX-BW-NEXT: [[TMP163:%.*]] = insertelement <4 x i32> [[TMP162]], i32 [[TMP159]], i32 1 ; MAX-BW-NEXT: [[TMP164:%.*]] = insertelement <4 x i32> [[TMP163]], i32 [[TMP160]], i32 2 @@ -445,30 +447,30 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP168]] = add <4 x i32> [[TMP167]], [[TMP166]] ; MAX-BW-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 4 ; MAX-BW-NEXT: [[TMP169:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 100 -; MAX-BW-NEXT: br i1 [[TMP169]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; MAX-BW: vec.epilog.middle.block: +; MAX-BW-NEXT: br i1 [[TMP169]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; MAX-BW: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAX-BW-NEXT: [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]]) -; MAX-BW-NEXT: br i1 true, label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] -; MAX-BW: vec.epilog.scalar.ph: -; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; MAX-BW-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; MAX-BW-NEXT: br label [[FOR_BODY1:%.*]] -; MAX-BW: for.cond.cleanup: -; MAX-BW-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY1]] ], [ [[TMP149]], [[MIDDLE_BLOCK]] ], [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; MAX-BW-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[SCALAR_PH]] +; MAX-BW: [[SCALAR_PH]]: +; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; MAX-BW-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; MAX-BW-NEXT: br label %[[FOR_BODY1:.*]] +; MAX-BW: [[FOR_COND_CLEANUP]]: +; MAX-BW-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY1]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; MAX-BW-NEXT: ret i32 [[ADD7_LCSSA]] -; MAX-BW: for.body: -; MAX-BW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ] -; MAX-BW-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY1]] ] +; MAX-BW: [[FOR_BODY1]]: +; MAX-BW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY1]] ] +; MAX-BW-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY1]] ] ; MAX-BW-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]] -; MAX-BW-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA1]] +; MAX-BW-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]] -; MAX-BW-NEXT: [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA1]] +; MAX-BW-NEXT: [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP151]], [[TMP150]] ; MAX-BW-NEXT: [[ADD:%.*]] = add i32 [[SUM_015]], 4 ; MAX-BW-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]] ; MAX-BW-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; MAX-BW-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 -; MAX-BW-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] +; MAX-BW-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] ; entry: %idxprom = sext i32 %i to i64 @@ -496,13 +498,14 @@ entry: } define void @test(ptr %A, ptr noalias %B) #0 { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 @@ -520,13 +523,13 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP5]], 0 ; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[TMP6]], 0 ; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[TMP7]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A:%.*]], i64 0, i64 [[TMP8]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP8]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]] ; CHECK-NEXT: [[TMP19:%.*]] = trunc <8 x i32> [[TMP18]] to <8 x i8> -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B:%.*]], i64 0, i64 [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP8]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP9]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP10]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP11]] @@ -552,13 +555,13 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK-NEXT: store i8 [[TMP35]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 -; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0 ; CHECK-NEXT: [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]] @@ -571,17 +574,18 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK-NEXT: store i8 [[REDUCE_ADD_0_NARROW]], ptr [[OUT]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV_0]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: for.cond.cleanup: +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void ; -; MAX-BW-LABEL: @test( -; MAX-BW-NEXT: entry: -; MAX-BW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; MAX-BW: vector.ph: -; MAX-BW-NEXT: br label [[VECTOR_BODY:%.*]] -; MAX-BW: vector.body: -; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; MAX-BW-LABEL: define void @test( +; MAX-BW-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; MAX-BW-NEXT: [[ENTRY:.*:]] +; MAX-BW-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; MAX-BW: [[VECTOR_PH]]: +; MAX-BW-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX-BW: [[VECTOR_BODY]]: +; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; MAX-BW-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 @@ -615,13 +619,13 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW-NEXT: [[TMP29:%.*]] = add nuw nsw i64 [[TMP13]], 0 ; MAX-BW-NEXT: [[TMP30:%.*]] = add nuw nsw i64 [[TMP14]], 0 ; MAX-BW-NEXT: [[TMP31:%.*]] = add nuw nsw i64 [[TMP15]], 0 -; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A:%.*]], i64 0, i64 [[TMP16]] +; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP16]] ; MAX-BW-NEXT: [[WIDE_VEC:%.*]] = load <32 x i32>, ptr [[TMP32]], align 4 ; MAX-BW-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> ; MAX-BW-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> ; MAX-BW-NEXT: [[TMP34:%.*]] = add <16 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]] ; MAX-BW-NEXT: [[TMP35:%.*]] = trunc <16 x i32> [[TMP34]] to <16 x i8> -; MAX-BW-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B:%.*]], i64 0, i64 [[TMP16]] +; MAX-BW-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP16]] ; MAX-BW-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP17]] ; MAX-BW-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP18]] ; MAX-BW-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP19]] @@ -671,13 +675,13 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW-NEXT: store i8 [[TMP67]], ptr [[TMP51]], align 1 ; MAX-BW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; MAX-BW-NEXT: [[TMP68:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 -; MAX-BW-NEXT: br i1 [[TMP68]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; MAX-BW: middle.block: -; MAX-BW-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; MAX-BW: scalar.ph: -; MAX-BW-NEXT: br label [[FOR_BODY:%.*]] -; MAX-BW: for.body: -; MAX-BW-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; MAX-BW-NEXT: br i1 [[TMP68]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; MAX-BW: [[MIDDLE_BLOCK]]: +; MAX-BW-NEXT: br label %[[FOR_COND_CLEANUP:.*]] +; MAX-BW: [[SCALAR_PH]]: +; MAX-BW-NEXT: br label %[[FOR_BODY:.*]] +; MAX-BW: [[FOR_BODY]]: +; MAX-BW-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; MAX-BW-NEXT: [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0 ; MAX-BW-NEXT: [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1 ; MAX-BW-NEXT: [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]] @@ -690,8 +694,8 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW-NEXT: store i8 [[REDUCE_ADD_0_NARROW]], ptr [[OUT]], align 1 ; MAX-BW-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV_0]], 2 ; MAX-BW-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 1024 -; MAX-BW-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] -; MAX-BW: for.cond.cleanup: +; MAX-BW-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] +; MAX-BW: [[FOR_COND_CLEANUP]]: ; MAX-BW-NEXT: ret void ; entry: @@ -733,3 +737,28 @@ attributes #0 = { "target-cpu"="core-avx2" "target-features"="+avx,+avx2,+sse,+s !2 = !{!"int", !3, i64 0} !3 = !{!"omnipotent char", !4, i64 0} !4 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]]} +; CHECK: [[META6]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]], [[META7]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]], [[META6]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META6]], [[META7]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META6]]} +;. +; MAX-BW: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; MAX-BW: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} +; MAX-BW: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; MAX-BW: [[META4]] = !{!"Simple C/C++ TBAA"} +; MAX-BW: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]]} +; MAX-BW: [[META6]] = !{!"llvm.loop.isvectorized", i32 1} +; MAX-BW: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"} +; MAX-BW: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]], [[META7]]} +; MAX-BW: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]], [[META6]]} +; MAX-BW: [[LOOP10]] = distinct !{[[LOOP10]], [[META6]], [[META7]]} +; MAX-BW: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META6]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll index e629560354f2a..f86ad8fc88a01 100644 --- a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll +++ b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 ; RUN: opt -passes=loop-vectorize -force-vector-width=8 -S %s | FileCheck %s @postscale = external constant [64 x float] @@ -11,11 +11,11 @@ define void @test(ptr %data) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr getelementptr inbounds nuw (i8, ptr @postscale, i64 4), align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr getelementptr inbounds nuw (i8, ptr @postscale, i64 4), align 4, !tbaa [[FLOAT_TBAA0:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[TMP2]] to <8 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 -; CHECK-NEXT: store i16 [[TMP4]], ptr [[DATA]], align 2, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP4]], ptr [[DATA]], align 2, !tbaa [[SHORT_TBAA4:![0-9]+]] ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] @@ -25,10 +25,10 @@ define void @test(ptr %data) { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[OR_IV_1:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_POSTSCALE:%.*]] = getelementptr [64 x float], ptr @postscale, i64 0, i64 [[OR_IV_1]] -; CHECK-NEXT: [[LOAD_POSTSCALE:%.*]] = load float, ptr [[GEP_POSTSCALE]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[LOAD_POSTSCALE:%.*]] = load float, ptr [[GEP_POSTSCALE]], align 4, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: [[LRINT:%.*]] = tail call i64 @llvm.lrint.i64.f32(float [[LOAD_POSTSCALE]]) ; CHECK-NEXT: [[LRINT_TRUNC:%.*]] = trunc i64 [[LRINT]] to i16 -; CHECK-NEXT: store i16 [[LRINT_TRUNC]], ptr [[DATA]], align 2, !tbaa [[TBAA4]] +; CHECK-NEXT: store i16 [[LRINT_TRUNC]], ptr [[DATA]], align 2, !tbaa [[SHORT_TBAA4]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 8 ; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll index 54779ed55cff8..e487eac3fee05 100644 --- a/llvm/test/Transforms/LoopVectorize/metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s ; RUN: opt -S < %s -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=2 | FileCheck --check-prefix=INTERLEAVE %s @@ -18,12 +18,12 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 9.900000e+01), !fpmath [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp oge <2 x double> [[TMP3]], splat (double 1.000000e+01) ; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x double> [[WIDE_LOAD]], <2 x double> zeroinitializer, !fpmath [[META3]] ; CHECK-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP6]] to <2 x float>, !fpmath [[META3]] -; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -37,12 +37,12 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[L_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[L_1]], 9.900000e+01, !fpmath [[META3]] ; CHECK-NEXT: [[C:%.*]] = fcmp oge double [[ADD]], 1.000000e+01 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], double [[L_1]], double 0.000000e+00, !fpmath [[META3]] ; CHECK-NEXT: [[T:%.*]] = fptrunc double [[SEL]] to float, !fpmath [[META3]] -; CHECK-NEXT: store float [[T]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store float [[T]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[SIZE]] ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] @@ -63,8 +63,8 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 2 -; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[TBAA0:![0-9]+]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0:![0-9]+]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP3]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 9.900000e+01), !fpmath [[META3:![0-9]+]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD1]], splat (double 9.900000e+01), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp oge <2 x double> [[TMP4]], splat (double 1.000000e+01) @@ -74,8 +74,8 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[TMP9:%.*]] = fptrunc <2 x double> [[TMP11]] to <2 x float>, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP10:%.*]] = fptrunc <2 x double> [[TMP8]] to <2 x float>, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 2 -; INTERLEAVE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]] -; INTERLEAVE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]] +; INTERLEAVE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP13]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -89,12 +89,12 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] ; INTERLEAVE-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; INTERLEAVE-NEXT: [[L_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]] +; INTERLEAVE-NEXT: [[L_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[ADD:%.*]] = fadd double [[L_1]], 9.900000e+01, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[C:%.*]] = fcmp oge double [[ADD]], 1.000000e+01 ; INTERLEAVE-NEXT: [[SEL:%.*]] = select i1 [[C]], double [[L_1]], double 0.000000e+00, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[T:%.*]] = fptrunc double [[SEL]] to float, !fpmath [[META3]] -; INTERLEAVE-NEXT: store float [[T]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]] +; INTERLEAVE-NEXT: store float [[T]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[SIZE]] ; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] @@ -133,7 +133,7 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP3]], align 4 @@ -147,7 +147,7 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !range [[RNG9:![0-9]+]] +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[CHAR_TBAA0]], !range [[RNG9:![0-9]+]] ; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]], !range [[RNG9]] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 @@ -167,8 +167,8 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i32 2 -; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD]]) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD1]]) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] @@ -185,7 +185,7 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[LOOP]]: ; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; INTERLEAVE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !range [[RNG9:![0-9]+]] +; INTERLEAVE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[CHAR_TBAA0]], !range [[RNG9:![0-9]+]] ; INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]], !range [[RNG9]] ; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; INTERLEAVE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 @@ -223,7 +223,7 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[TMP3]], align 8 @@ -237,7 +237,7 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[CALL:%.*]] = call double @bar(double [[LOAD]]) #[[ATTR2:[0-9]+]], !fpmath [[META3]] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 8 @@ -257,8 +257,8 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2 -; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] @@ -275,7 +275,7 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[LOOP]]: ; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] -; INTERLEAVE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]] +; INTERLEAVE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[CALL:%.*]] = call double @bar(double [[LOAD]]) #[[ATTR2:[0-9]+]], !fpmath [[META3]] ; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] ; INTERLEAVE-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 8 @@ -403,7 +403,7 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[TMP3]], align 8 @@ -417,7 +417,7 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[CALL:%.*]] = call double @llvm.sin.f64(double [[LOAD]]) #[[ATTR2]], !fpmath [[META3]] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 8 @@ -437,8 +437,8 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2 -; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] @@ -455,7 +455,7 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[LOOP]]: ; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] -; INTERLEAVE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]] +; INTERLEAVE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[CALL:%.*]] = call double @llvm.sin.f64(double [[LOAD]]) #[[ATTR2]], !fpmath [[META3]] ; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] ; INTERLEAVE-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 8 @@ -608,7 +608,7 @@ attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_bar(bar_ !3 = !{!"omnipotent char", !2, i64 0} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0} +; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0} ; CHECK: [[META1]] = !{!"omnipotent char", [[META2]]} ; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"} ; CHECK: [[META3]] = !{float 2.500000e+00} @@ -628,7 +628,7 @@ attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_bar(bar_ ; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META5]], [[META6]]} ; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META6]], [[META5]]} ;. -; INTERLEAVE: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0} +; INTERLEAVE: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0} ; INTERLEAVE: [[META1]] = !{!"omnipotent char", [[META2]]} ; INTERLEAVE: [[META2]] = !{!"Simple C/C++ TBAA"} ; INTERLEAVE: [[META3]] = !{float 2.500000e+00} diff --git a/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll b/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll index 16ad4bfed0fd3..9f77bbfe5ac35 100644 --- a/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll +++ b/llvm/test/Transforms/LoopVersioning/add-phi-update-users.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=loop-versioning -S -o - | FileCheck %s ; This test case used to end like this: @@ -22,48 +22,48 @@ define void @f1() { ; CHECK-LABEL: define void @f1() { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[T0:%.*]] = load ptr, ptr @c, align 1 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[T0]], i64 2 -; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] -; CHECK: for.body.lver.check: +; CHECK-NEXT: br label %[[FOR_BODY_LVER_CHECK:.*]] +; CHECK: [[FOR_BODY_LVER_CHECK]]: ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[T0]], getelementptr inbounds nuw (i8, ptr @b, i64 2) ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr @b, [[SCEVGEP]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] -; CHECK: for.body.ph.lver.orig: -; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] -; CHECK: for.body.lver.orig: -; CHECK-NEXT: [[T1_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: [[T2_LVER_ORIG:%.*]] = load i16, ptr @b, align 1, !tbaa [[TBAA2:![0-9]+]] -; CHECK-NEXT: store i16 [[T2_LVER_ORIG]], ptr [[T0]], align 1, !tbaa [[TBAA2]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[FOR_BODY_PH_LVER_ORIG:.*]], label %[[FOR_BODY_PH:.*]] +; CHECK: [[FOR_BODY_PH_LVER_ORIG]]: +; CHECK-NEXT: br label %[[FOR_BODY_LVER_ORIG:.*]] +; CHECK: [[FOR_BODY_LVER_ORIG]]: +; CHECK-NEXT: [[T1_LVER_ORIG:%.*]] = phi i64 [ 0, %[[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], %[[FOR_BODY_LVER_ORIG]] ] +; CHECK-NEXT: [[T2_LVER_ORIG:%.*]] = load i16, ptr @b, align 1, !tbaa [[LONG_LONG_TBAA2:![0-9]+]] +; CHECK-NEXT: store i16 [[T2_LVER_ORIG]], ptr [[T0]], align 1, !tbaa [[LONG_LONG_TBAA2]] ; CHECK-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[T1_LVER_ORIG]], 1 ; CHECK-NEXT: [[CMP_LVER_ORIG:%.*]] = icmp ult i64 [[INC_LVER_ORIG]], 3 -; CHECK-NEXT: br i1 [[CMP_LVER_ORIG]], label [[FOR_BODY_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: for.body.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[T1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[T2:%.*]] = load i16, ptr @b, align 1, !tbaa [[TBAA2]], !alias.scope [[META6:![0-9]+]] -; CHECK-NEXT: store i16 [[T2]], ptr [[T0]], align 1, !tbaa [[TBAA2]], !alias.scope [[META9:![0-9]+]], !noalias [[META6]] +; CHECK-NEXT: br i1 [[CMP_LVER_ORIG]], label %[[FOR_BODY_LVER_ORIG]], label %[[FOR_END_LOOPEXIT:.*]] +; CHECK: [[FOR_BODY_PH]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[T1:%.*]] = phi i64 [ 0, %[[FOR_BODY_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[T2:%.*]] = load i16, ptr @b, align 1, !tbaa [[LONG_LONG_TBAA2]], !alias.scope [[META6:![0-9]+]] +; CHECK-NEXT: store i16 [[T2]], ptr [[T0]], align 1, !tbaa [[LONG_LONG_TBAA2]], !alias.scope [[META9:![0-9]+]], !noalias [[META6]] ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[T1]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 3 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT1:%.*]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[T2_LVER_PH:%.*]] = phi i16 [ [[T2_LVER_ORIG]], [[FOR_BODY_LVER_ORIG]] ] -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit1: -; CHECK-NEXT: [[T2_LVER_PH2:%.*]] = phi i16 [ [[T2]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: [[T2_LVER:%.*]] = phi i16 [ [[T2_LVER_PH]], [[FOR_END_LOOPEXIT]] ], [ [[T2_LVER_PH2]], [[FOR_END_LOOPEXIT1]] ] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END_LOOPEXIT1:.*]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: [[T2_LVER_PH:%.*]] = phi i16 [ [[T2_LVER_ORIG]], %[[FOR_BODY_LVER_ORIG]] ] +; CHECK-NEXT: br label %[[FOR_END:.*]] +; CHECK: [[FOR_END_LOOPEXIT1]]: +; CHECK-NEXT: [[T2_LVER_PH2:%.*]] = phi i16 [ [[T2]], %[[FOR_BODY]] ] +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[T2_LVER:%.*]] = phi i16 [ [[T2_LVER_PH]], %[[FOR_END_LOOPEXIT]] ], [ [[T2_LVER_PH2]], %[[FOR_END_LOOPEXIT1]] ] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[T2_LVER]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_COND_BACKEDGE:%.*]], label [[IF_THEN:%.*]] -; CHECK: for.cond.backedge: -; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK]] -; CHECK: if.then: -; CHECK-NEXT: store i16 [[T2_LVER]], ptr @a, align 1, !tbaa [[TBAA2]] -; CHECK-NEXT: br label [[FOR_COND_BACKEDGE]] +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[FOR_COND_BACKEDGE:.*]], label %[[IF_THEN:.*]] +; CHECK: [[FOR_COND_BACKEDGE]]: +; CHECK-NEXT: br label %[[FOR_BODY_LVER_CHECK]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i16 [[T2_LVER]], ptr @a, align 1, !tbaa [[LONG_LONG_TBAA2]] +; CHECK-NEXT: br label %[[FOR_COND_BACKEDGE]] ; entry: %t0 = load ptr, ptr @c, align 1 @@ -101,3 +101,14 @@ if.then: ; preds = %for.end !3 = !{!"long long", !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[LONG_LONG_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +; CHECK: [[META3]] = !{!"long long", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[META6]] = !{[[META7:![0-9]+]]} +; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} +; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"} +; CHECK: [[META9]] = !{[[META10:![0-9]+]]} +; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]} +;. diff --git a/llvm/test/Transforms/MergedLoadStoreMotion/preserve-store-metadata.ll b/llvm/test/Transforms/MergedLoadStoreMotion/preserve-store-metadata.ll index 33e37c97b7a0e..1dfdf09a26999 100644 --- a/llvm/test/Transforms/MergedLoadStoreMotion/preserve-store-metadata.ll +++ b/llvm/test/Transforms/MergedLoadStoreMotion/preserve-store-metadata.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=mldst-motion -S %s | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" @@ -10,7 +10,7 @@ define void @perserve_common_metadata(i1 %c, ptr %dst, ptr %min) { ; CHECK-NEXT: [[GEP_DST_16:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 16 ; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: -; CHECK-NEXT: store ptr [[DST]], ptr [[MIN]], align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store ptr [[DST]], ptr [[MIN]], align 8, !tbaa [[INT_TBAA0:![0-9]+]] ; CHECK-NEXT: br label %[[RETURN:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: [[GEP_DST_24:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 24 @@ -18,7 +18,7 @@ define void @perserve_common_metadata(i1 %c, ptr %dst, ptr %min) { ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: ; CHECK-NEXT: [[DOTSINK:%.*]] = phi ptr [ [[DST]], %[[THEN]] ], [ null, %[[ELSE]] ] -; CHECK-NEXT: store ptr [[DOTSINK]], ptr [[GEP_DST_16]], align 8, !tbaa [[TBAA4:![0-9]+]], !alias.scope [[META6:![0-9]+]], !noalias [[META6]], !llvm.access.group [[ACC_GRP9:![0-9]+]] +; CHECK-NEXT: store ptr [[DOTSINK]], ptr [[GEP_DST_16]], align 8, !tbaa [[LONG_TBAA4:![0-9]+]], !alias.scope [[META6:![0-9]+]], !noalias [[META6]], !llvm.access.group [[ACC_GRP9:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -47,7 +47,7 @@ define void @clear_different_metadata(i1 %c, ptr %dst, ptr %min) { ; CHECK-NEXT: [[GEP_DST_16:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 16 ; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: -; CHECK-NEXT: store ptr [[DST]], ptr [[MIN]], align 8, !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: store ptr [[DST]], ptr [[MIN]], align 8, !tbaa [[_FOOPTR_TBAA10:![0-9]+]] ; CHECK-NEXT: br label %[[RETURN:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: [[GEP_DST_24:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 24 @@ -55,7 +55,7 @@ define void @clear_different_metadata(i1 %c, ptr %dst, ptr %min) { ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: ; CHECK-NEXT: [[DOTSINK:%.*]] = phi ptr [ [[DST]], %[[THEN]] ], [ null, %[[ELSE]] ] -; CHECK-NEXT: store ptr [[DOTSINK]], ptr [[GEP_DST_16]], align 8 +; CHECK-NEXT: store ptr [[DOTSINK]], ptr [[GEP_DST_16]], align 8, !tbaa [[CHAR_TBAA13:![0-9]+]], !alias.scope [[META6]], !noalias [[META6]] ; CHECK-NEXT: ret void ; entry: @@ -93,17 +93,18 @@ return: !13 = distinct !{} !14 = distinct !{} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]]} ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"Simple C++ TBAA"} -; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0, i64 0} +; CHECK: [[LONG_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0, i64 0} ; CHECK: [[META5]] = !{!"long", [[META2]]} ; CHECK: [[META6]] = !{[[META7:![0-9]+]]} ; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} ; CHECK: [[META8]] = distinct !{[[META8]]} ; CHECK: [[ACC_GRP9]] = distinct !{} -; CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0, i64 0} +; CHECK: [[_FOOPTR_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0, i64 0} ; CHECK: [[META11]] = !{!"p2 _Foo", [[META12:![0-9]+]]} ; CHECK: [[META12]] = !{!"any pointer", [[META2]], i64 0} +; CHECK: [[CHAR_TBAA13]] = !{[[META2]], [[META2]], i64 0} ;. diff --git a/llvm/test/Transforms/NewGVN/memory-handling.ll b/llvm/test/Transforms/NewGVN/memory-handling.ll index bf07edf91f2ba..f83d145167c75 100644 --- a/llvm/test/Transforms/NewGVN/memory-handling.ll +++ b/llvm/test/Transforms/NewGVN/memory-handling.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ;; This test is really dependent on propagating a lot of memory info around, but in the end, not ;; screwing up a single add. ; RUN: opt < %s -passes=newgvn -S | FileCheck %s @@ -26,114 +26,114 @@ define void @BuildMask(ptr nocapture readonly) local_unnamed_addr #0 { ; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 16 @alPhrase, i8 0, i64 416, i1 false) ; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 16 @aqMainMask, i8 0, i64 16, i1 false) ; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 16 @aqMainSign, i8 0, i64 16, i1 false) -; CHECK-NEXT: br label [[DOTSINK_SPLIT:%.*]] -; CHECK: .sink.split: -; CHECK-NEXT: [[DOT0:%.*]] = phi ptr [ [[TMP0]], [[TMP1:%.*]] ], [ [[TMP3:%.*]], [[TMP14:%.*]] ] -; CHECK-NEXT: [[DOTSINK:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP22:%.*]], [[TMP14]] ] -; CHECK-NEXT: store i32 [[DOTSINK]], ptr @cchPhraseLength, align 4, !tbaa [[TBAA1:![0-9]+]] -; CHECK-NEXT: br label [[TMP2:%.*]] -; CHECK: 2: -; CHECK-NEXT: [[DOT1:%.*]] = phi ptr [ [[DOT0]], [[DOTSINK_SPLIT]] ], [ [[TMP3]], [[TMP6:%.*]] ] +; CHECK-NEXT: br label %[[DOTSINK_SPLIT:.*]] +; CHECK: [[_SINK_SPLIT:.*:]] +; CHECK-NEXT: [[DOT0:%.*]] = phi ptr [ [[TMP0]], [[TMP1:%.*]] ], [ [[TMP3:%.*]], %[[TMP14:.*]] ] +; CHECK-NEXT: [[DOTSINK:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP22:%.*]], %[[TMP14]] ] +; CHECK-NEXT: store i32 [[DOTSINK]], ptr @cchPhraseLength, align 4, !tbaa [[INT_TBAA1:![0-9]+]] +; CHECK-NEXT: br label %[[BB2:.*]] +; CHECK: [[BB2]]: +; CHECK-NEXT: [[DOT1:%.*]] = phi ptr [ [[DOT0]], %[[DOTSINK_SPLIT]] ], [ [[TMP3]], %[[TMP6:.*]] ] ; CHECK-NEXT: [[TMP3]] = getelementptr inbounds i8, ptr [[DOT1]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOT1]], align 1, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOT1]], align 1, !tbaa [[CHAR_TBAA5:![0-9]+]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[DOTPREHEADER_PREHEADER:%.*]], label [[TMP6]] -; CHECK: .preheader.preheader: -; CHECK-NEXT: br label [[DOTPREHEADER:%.*]] -; CHECK: 6: +; CHECK-NEXT: br i1 [[TMP5]], label %[[DOTPREHEADER_PREHEADER:.*]], label %[[TMP6]] +; CHECK: [[_PREHEADER_PREHEADER:.*:]] +; CHECK-NEXT: br [[DOTPREHEADER:label %.*]] +; CHECK: [[TMP6]]: ; CHECK-NEXT: [[TMP7:%.*]] = tail call ptr @__ctype_b_loc() #[[ATTR4:[0-9]+]] -; CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA6:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[ANYPTR_TBAA6:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = sext i8 [[TMP4]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP10]], align 2, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP10]], align 2, !tbaa [[SHORT_TBAA8:![0-9]+]] ; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 1024 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i16 [[TMP12]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label [[TMP2]], label [[TMP14]] -; CHECK: 14: +; CHECK-NEXT: br i1 [[TMP13]], label %[[BB2]], label %[[TMP14]] +; CHECK: [[TMP14]]: ; CHECK-NEXT: [[TMP15:%.*]] = sext i8 [[TMP4]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = tail call i32 @tolower(i32 [[TMP15]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], -97 ; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[TMP18]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 16, !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 16, !tbaa [[INT_TBAA10:![0-9]+]] ; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], 1 -; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP19]], align 16, !tbaa [[TBAA10]] +; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP19]], align 16, !tbaa [[INT_TBAA10]] ; CHECK-NEXT: [[TMP22]] = add nsw i32 [[DOTSINK]], 1 -; CHECK-NEXT: br label [[DOTSINK_SPLIT]] -; CHECK: .preheader: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[DOTPREHEADER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP57:%.*]] ] -; CHECK-NEXT: [[DOT04961:%.*]] = phi i32 [ [[DOT2:%.*]], [[TMP57]] ], [ 0, [[DOTPREHEADER_PREHEADER]] ] -; CHECK-NEXT: [[DOT05160:%.*]] = phi i32 [ [[DOT253:%.*]], [[TMP57]] ], [ 0, [[DOTPREHEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[DOTSINK_SPLIT]] +; CHECK: [[_PREHEADER:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[DOTPREHEADER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[TMP57:.*]] ] +; CHECK-NEXT: [[DOT04961:%.*]] = phi i32 [ [[DOT2:%.*]], %[[TMP57]] ], [ 0, %[[DOTPREHEADER_PREHEADER]] ] +; CHECK-NEXT: [[DOT05160:%.*]] = phi i32 [ [[DOT253:%.*]], %[[TMP57]] ], [ 0, %[[DOTPREHEADER_PREHEADER]] ] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[TBAA10]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[INT_TBAA10]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP24]], 0 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [26 x i32], ptr @auGlobalFrequency, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: br i1 [[TMP25]], label [[TMP27:%.*]], label [[TMP28:%.*]] -; CHECK: 27: -; CHECK-NEXT: store i32 -1, ptr [[TMP26]], align 4, !tbaa [[TBAA1]] -; CHECK-NEXT: br label [[TMP57]] -; CHECK: 28: -; CHECK-NEXT: store i32 0, ptr [[TMP26]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: br i1 [[TMP25]], label %[[TMP27:.*]], label %[[TMP28:.*]] +; CHECK: [[TMP27]]: +; CHECK-NEXT: store i32 -1, ptr [[TMP26]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: br label %[[TMP57]] +; CHECK: [[TMP28]]: +; CHECK-NEXT: store i32 0, ptr [[TMP26]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[TMP24]] to i64 -; CHECK-NEXT: br i1 false, label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]] -; CHECK: .lr.ph.preheader: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: [[DOT04658:%.*]] = phi i64 [ [[TMP31:%.*]], [[DOTLR_PH]] ], [ 1, [[DOTLR_PH_PREHEADER]] ] -; CHECK-NEXT: [[DOT04857:%.*]] = phi i32 [ [[TMP30:%.*]], [[DOTLR_PH]] ], [ 1, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: br i1 false, label %[[DOT_CRIT_EDGE:.*]], label %[[DOTLR_PH_PREHEADER:.*]] +; CHECK: [[_LR_PH_PREHEADER:.*:]] +; CHECK-NEXT: br label %[[DOTLR_PH:.*]] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[DOT04658:%.*]] = phi i64 [ [[TMP31:%.*]], %[[DOTLR_PH]] ], [ 1, %[[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: [[DOT04857:%.*]] = phi i32 [ [[TMP30:%.*]], %[[DOTLR_PH]] ], [ 1, %[[DOTLR_PH_PREHEADER]] ] ; CHECK-NEXT: [[TMP30]] = add nuw nsw i32 [[DOT04857]], 1 ; CHECK-NEXT: [[TMP31]] = shl i64 [[DOT04658]], 1 ; CHECK-NEXT: [[TMP32:%.*]] = icmp ult i64 [[TMP29]], [[TMP31]] -; CHECK-NEXT: br i1 [[TMP32]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[DOTLR_PH]] -; CHECK: ._crit_edge.loopexit: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] -; CHECK: ._crit_edge: -; CHECK-NEXT: [[DOT048_LCSSA:%.*]] = phi i32 [ poison, [[TMP28]] ], [ [[TMP30]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] -; CHECK-NEXT: [[DOT046_LCSSA:%.*]] = phi i64 [ poison, [[TMP28]] ], [ [[TMP31]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] +; CHECK-NEXT: br i1 [[TMP32]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]], label %[[DOTLR_PH]] +; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]] +; CHECK-NEXT: br label %[[DOT_CRIT_EDGE]] +; CHECK: [[__CRIT_EDGE:.*:]] +; CHECK-NEXT: [[DOT048_LCSSA:%.*]] = phi i32 [ poison, %[[TMP28]] ], [ [[TMP30]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ] +; CHECK-NEXT: [[DOT046_LCSSA:%.*]] = phi i64 [ poison, %[[TMP28]] ], [ [[TMP31]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ] ; CHECK-NEXT: [[TMP33:%.*]] = add nsw i32 [[DOT048_LCSSA]], [[DOT04961]] ; CHECK-NEXT: [[TMP34:%.*]] = icmp ugt i32 [[TMP33]], 64 -; CHECK-NEXT: br i1 [[TMP34]], label [[TMP35:%.*]], label [[TMP39:%.*]] -; CHECK: 35: +; CHECK-NEXT: br i1 [[TMP34]], label %[[TMP35:.*]], label %[[TMP39:.*]] +; CHECK: [[TMP35]]: ; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[DOT05160]], 1 ; CHECK-NEXT: [[TMP37:%.*]] = icmp ugt i32 [[TMP36]], 1 -; CHECK-NEXT: br i1 [[TMP37]], label [[TMP38:%.*]], label [[TMP39]] -; CHECK: 38: +; CHECK-NEXT: br i1 [[TMP37]], label %[[TMP38:.*]], label %[[TMP39]] +; CHECK: [[TMP38]]: ; CHECK-NEXT: tail call void @Fatal(ptr @.str.7, i32 0) -; CHECK-NEXT: br label [[TMP39]] -; CHECK: 39: -; CHECK-NEXT: [[DOT152:%.*]] = phi i32 [ [[DOT05160]], [[DOT_CRIT_EDGE]] ], [ [[TMP36]], [[TMP38]] ], [ [[TMP36]], [[TMP35]] ] -; CHECK-NEXT: [[DOT150:%.*]] = phi i32 [ [[DOT04961]], [[DOT_CRIT_EDGE]] ], [ 0, [[TMP38]] ], [ 0, [[TMP35]] ] +; CHECK-NEXT: br label %[[TMP39]] +; CHECK: [[TMP39]]: +; CHECK-NEXT: [[DOT152:%.*]] = phi i32 [ [[DOT05160]], %[[DOT_CRIT_EDGE]] ], [ [[TMP36]], %[[TMP38]] ], [ [[TMP36]], %[[TMP35]] ] +; CHECK-NEXT: [[DOT150:%.*]] = phi i32 [ [[DOT04961]], %[[DOT_CRIT_EDGE]] ], [ 0, %[[TMP38]] ], [ 0, %[[TMP35]] ] ; CHECK-NEXT: [[TMP40:%.*]] = add i64 [[DOT046_LCSSA]], 4294967295 ; CHECK-NEXT: [[TMP41:%.*]] = trunc i64 [[TMP40]] to i32 ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 2 -; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP42]], align 8, !tbaa [[TBAA12:![0-9]+]] +; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP42]], align 8, !tbaa [[INT_TBAA12:![0-9]+]] ; CHECK-NEXT: [[TMP43:%.*]] = zext i32 [[DOT150]] to i64 ; CHECK-NEXT: [[DOT046_:%.*]] = shl i64 [[DOT046_LCSSA]], [[TMP43]] ; CHECK-NEXT: [[TMP44:%.*]] = zext i32 [[DOT152]] to i64 ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x i64], ptr @aqMainSign, i64 0, i64 [[TMP44]] -; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP45]], align 8, !tbaa [[TBAA13:![0-9]+]] +; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP45]], align 8, !tbaa [[LONG_TBAA13:![0-9]+]] ; CHECK-NEXT: [[TMP47:%.*]] = or i64 [[TMP46]], [[DOT046_]] -; CHECK-NEXT: store i64 [[TMP47]], ptr [[TMP45]], align 8, !tbaa [[TBAA13]] -; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[TBAA10]] +; CHECK-NEXT: store i64 [[TMP47]], ptr [[TMP45]], align 8, !tbaa [[LONG_TBAA13]] +; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP23]], align 16, !tbaa [[INT_TBAA10]] ; CHECK-NEXT: [[TMP49:%.*]] = zext i32 [[TMP48]] to i64 ; CHECK-NEXT: [[TMP50:%.*]] = shl i64 [[TMP49]], [[TMP43]] ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i64], ptr @aqMainMask, i64 0, i64 [[TMP44]] -; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP51]], align 8, !tbaa [[TBAA13]] +; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP51]], align 8, !tbaa [[LONG_TBAA13]] ; CHECK-NEXT: [[TMP53:%.*]] = or i64 [[TMP50]], [[TMP52]] -; CHECK-NEXT: store i64 [[TMP53]], ptr [[TMP51]], align 8, !tbaa [[TBAA13]] +; CHECK-NEXT: store i64 [[TMP53]], ptr [[TMP51]], align 8, !tbaa [[LONG_TBAA13]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 1 -; CHECK-NEXT: store i32 [[DOT150]], ptr [[TMP54]], align 4, !tbaa [[TBAA15:![0-9]+]] +; CHECK-NEXT: store i32 [[DOT150]], ptr [[TMP54]], align 4, !tbaa [[INT_TBAA15:![0-9]+]] ; CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [26 x %struct.Letter], ptr @alPhrase, i64 0, i64 [[INDVARS_IV]], i32 3 -; CHECK-NEXT: store i32 [[DOT152]], ptr [[TMP55]], align 4, !tbaa [[TBAA16:![0-9]+]] +; CHECK-NEXT: store i32 [[DOT152]], ptr [[TMP55]], align 4, !tbaa [[INT_TBAA16:![0-9]+]] ; CHECK-NEXT: [[TMP56:%.*]] = add nsw i32 [[DOT150]], [[DOT048_LCSSA]] -; CHECK-NEXT: br label [[TMP57]] -; CHECK: 57: -; CHECK-NEXT: [[DOT253]] = phi i32 [ [[DOT05160]], [[TMP27]] ], [ [[DOT152]], [[TMP39]] ] -; CHECK-NEXT: [[DOT2]] = phi i32 [ [[DOT04961]], [[TMP27]] ], [ [[TMP56]], [[TMP39]] ] +; CHECK-NEXT: br label %[[TMP57]] +; CHECK: [[TMP57]]: +; CHECK-NEXT: [[DOT253]] = phi i32 [ [[DOT05160]], %[[TMP27]] ], [ [[DOT152]], %[[TMP39]] ] +; CHECK-NEXT: [[DOT2]] = phi i32 [ [[DOT04961]], %[[TMP27]] ], [ [[TMP56]], %[[TMP39]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 26 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOTPREHEADER]], label [[TMP58:%.*]] -; CHECK: 58: +; CHECK-NEXT: br i1 [[EXITCOND]], [[DOTPREHEADER]], label %[[BB58:.*]] +; CHECK: [[BB58]]: ; CHECK-NEXT: ret void ; tail call void @llvm.memset.p0.i64(ptr align 16 @alPhrase, i8 0, i64 416, i1 false) @@ -309,20 +309,20 @@ attributes #5 = { nounwind readonly } !15 = !{!11, !2, i64 4} !16 = !{!11, !2, i64 12} ;. -; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} ; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} -; CHECK: [[TBAA5]] = !{[[META3]], [[META3]], i64 0} -; CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +; CHECK: [[CHAR_TBAA5]] = !{[[META3]], [[META3]], i64 0} +; CHECK: [[ANYPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} ; CHECK: [[META7]] = !{!"any pointer", [[META3]], i64 0} -; CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +; CHECK: [[SHORT_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} ; CHECK: [[META9]] = !{!"short", [[META3]], i64 0} -; CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META2]], i64 0} +; CHECK: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META2]], i64 0} ; CHECK: [[META11]] = !{!"", [[META2]], i64 0, [[META2]], i64 4, [[META2]], i64 8, [[META2]], i64 12} -; CHECK: [[TBAA12]] = !{[[META11]], [[META2]], i64 8} -; CHECK: [[TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} +; CHECK: [[INT_TBAA12]] = !{[[META11]], [[META2]], i64 8} +; CHECK: [[LONG_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} ; CHECK: [[META14]] = !{!"long", [[META3]], i64 0} -; CHECK: [[TBAA15]] = !{[[META11]], [[META2]], i64 4} -; CHECK: [[TBAA16]] = !{[[META11]], [[META2]], i64 12} +; CHECK: [[INT_TBAA15]] = !{[[META11]], [[META2]], i64 4} +; CHECK: [[INT_TBAA16]] = !{[[META11]], [[META2]], i64 12} ;. diff --git a/llvm/test/Transforms/NewGVN/pr31501.ll b/llvm/test/Transforms/NewGVN/pr31501.ll index 18bfcd1b9ca09..353c693f2a29b 100644 --- a/llvm/test/Transforms/NewGVN/pr31501.ll +++ b/llvm/test/Transforms/NewGVN/pr31501.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=newgvn -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -50,32 +50,33 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; Function Attrs: norecurse nounwind ssp uwtable define weak_odr hidden ptr @quux(ptr %arg, ptr %arg1) local_unnamed_addr #0 align 2 { -; CHECK-LABEL: @quux( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [[STRUCT_BARNEY:%.*]], ptr [[ARG:%.*]], i64 0, i32 3, i32 0, i32 0, i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !tbaa [[TBAA2:![0-9]+]] +; CHECK-LABEL: define weak_odr hidden ptr @quux( +; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [[STRUCT_BARNEY:%.*]], ptr [[ARG]], i64 0, i32 3, i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !tbaa [[ANYPTR_TBAA2:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_BARNEY]], ptr [[ARG]], i64 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA7:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[ANYPTR_TBAA7:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq ptr [[TMP3]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[BB21:%.*]], label [[BB8:%.*]] -; CHECK: bb8: -; CHECK-NEXT: br label [[BB11:%.*]] -; CHECK: bb9: +; CHECK-NEXT: br i1 [[TMP7]], label %[[BB21:.*]], label %[[BB8:.*]] +; CHECK: [[BB8]]: +; CHECK-NEXT: br label %[[BB11:.*]] +; CHECK: [[BB9:.*]]: ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP18:%.*]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP10]], label [[BB19:%.*]], label [[BB11]] -; CHECK: bb11: -; CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP17:%.*]], [[BB9:%.*]] ], [ undef, [[BB8]] ] -; CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[TMP18]], [[BB9]] ], [ [[TMP3]], [[BB8]] ] -; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8, !tbaa [[TBAA8:![0-9]+]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq ptr [[TMP15]], [[ARG1:%.*]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[BB19:.*]], label %[[BB11]] +; CHECK: [[BB11]]: +; CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP17:%.*]], %[[BB9]] ], [ undef, %[[BB8]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[TMP18]], %[[BB9]] ], [ [[TMP3]], %[[BB8]] ] +; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8, !tbaa [[ANYPTR_TBAA8:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq ptr [[TMP15]], [[ARG1]] ; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], ptr [[TMP13]], ptr [[TMP12]] ; CHECK-NEXT: [[TMP18]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[TMP13]], i64 1 -; CHECK-NEXT: br i1 [[TMP16]], label [[BB19]], label [[BB9]] -; CHECK: bb19: -; CHECK-NEXT: [[TMP20:%.*]] = phi ptr [ null, [[BB9]] ], [ [[TMP17]], [[BB11]] ] -; CHECK-NEXT: br label [[BB21]] -; CHECK: bb21: -; CHECK-NEXT: [[TMP22:%.*]] = phi ptr [ null, [[BB:%.*]] ], [ [[TMP20]], [[BB19]] ] +; CHECK-NEXT: br i1 [[TMP16]], label %[[BB19]], label %[[BB9]] +; CHECK: [[BB19]]: +; CHECK-NEXT: [[TMP20:%.*]] = phi ptr [ null, %[[BB9]] ], [ [[TMP17]], %[[BB11]] ] +; CHECK-NEXT: br label %[[BB21]] +; CHECK: [[BB21]]: +; CHECK-NEXT: [[TMP22:%.*]] = phi ptr [ null, %[[BB]] ], [ [[TMP20]], %[[BB19]] ] ; CHECK-NEXT: ret ptr [[TMP22]] ; bb: @@ -128,3 +129,15 @@ attributes #0 = { norecurse nounwind ssp uwtable "correctly-rounded-divide-sqrt- !9 = !{!"_ZTSN4llvm9RecordValE", !4, i64 0, !10, i64 8, !4, i64 16} !10 = !{!"_ZTSN4llvm14PointerIntPairIPNS_5RecTyELj1EbNS_21PointerLikeTypeTraitsIS2_EENS_18PointerIntPairInfoIS2_Lj1ES4_EEEE", !11, i64 0} !11 = !{!"long", !5, i64 0} +;. +; CHECK: [[ANYPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"_ZTSN4llvm15SmallVectorBaseE", [[META4]], i64 0, [[META4]], i64 8, [[META4]], i64 16} +; CHECK: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +; CHECK: [[META6]] = !{!"Simple C++ TBAA"} +; CHECK: [[ANYPTR_TBAA7]] = !{[[META3]], [[META4]], i64 8} +; CHECK: [[ANYPTR_TBAA8]] = !{[[META9:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META9]] = !{!"_ZTSN4llvm9RecordValE", [[META4]], i64 0, [[META10:![0-9]+]], i64 8, [[META4]], i64 16} +; CHECK: [[META10]] = !{!"_ZTSN4llvm14PointerIntPairIPNS_5RecTyELj1EbNS_21PointerLikeTypeTraitsIS2_EENS_18PointerIntPairInfoIS2_Lj1ES4_EEEE", [[META11:![0-9]+]], i64 0} +; CHECK: [[META11]] = !{!"long", [[META5]], i64 0} +;. diff --git a/llvm/test/Transforms/NewGVN/pr33305.ll b/llvm/test/Transforms/NewGVN/pr33305.ll index 3a19f610defcd..e742f14249c7c 100644 --- a/llvm/test/Transforms/NewGVN/pr33305.ll +++ b/llvm/test/Transforms/NewGVN/pr33305.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=newgvn -S %s | FileCheck %s ; Ensure we do not incorrect do phi of ops source_filename = "/Users/dannyb/sources/llvm-clean/debug-build/pr33305.c" @@ -17,68 +17,69 @@ target triple = "x86_64-apple-macosx10.12.0" ; Function Attrs: nounwind optsize ssp uwtable define i32 @main() local_unnamed_addr #0 { -; CHECK-LABEL: @main( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTPR_I:%.*]] = load i32, ptr @c, align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-LABEL: define i32 @main( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DOTPR_I:%.*]] = load i32, ptr @c, align 4, !tbaa [[INT_TBAA3:![0-9]+]] ; CHECK-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[DOTPR_I]], 1 -; CHECK-NEXT: br i1 [[CMP13_I]], label [[FOR_COND1_PREHEADER_LR_PH_I:%.*]], label [[ENTRY_FOR_END9_I_CRIT_EDGE:%.*]] -; CHECK: entry.for.end9.i_crit_edge: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr @h, align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: br label [[FOR_END9_I:%.*]] -; CHECK: for.cond1.preheader.lr.ph.i: -; CHECK-NEXT: [[G_PROMOTED14_I:%.*]] = load i32, ptr @g, align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_I:%.*]] -; CHECK: for.cond1.preheader.i: -; CHECK-NEXT: [[INC816_I:%.*]] = phi i32 [ [[DOTPR_I]], [[FOR_COND1_PREHEADER_LR_PH_I]] ], [ [[INC8_I:%.*]], [[FOR_INC7_I:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[G_PROMOTED14_I]], [[FOR_COND1_PREHEADER_LR_PH_I]] ], [ 0, [[FOR_INC7_I]] ] -; CHECK-NEXT: br label [[FOR_BODY3_I:%.*]] -; CHECK: for.body3.i: -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[FOR_COND1_PREHEADER_I]] ], [ true, [[LOR_END_I:%.*]] ] -; CHECK-NEXT: [[INC12_I:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_I]] ], [ [[INC_I:%.*]], [[LOR_END_I]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[FOR_COND1_PREHEADER_I]] ], [ 0, [[LOR_END_I]] ] +; CHECK-NEXT: br i1 [[CMP13_I]], label %[[FOR_COND1_PREHEADER_LR_PH_I:.*]], label %[[ENTRY_FOR_END9_I_CRIT_EDGE:.*]] +; CHECK: [[ENTRY_FOR_END9_I_CRIT_EDGE]]: +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr @h, align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: br label %[[FOR_END9_I:.*]] +; CHECK: [[FOR_COND1_PREHEADER_LR_PH_I]]: +; CHECK-NEXT: [[G_PROMOTED14_I:%.*]] = load i32, ptr @g, align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: br label %[[FOR_COND1_PREHEADER_I:.*]] +; CHECK: [[FOR_COND1_PREHEADER_I]]: +; CHECK-NEXT: [[INC816_I:%.*]] = phi i32 [ [[DOTPR_I]], %[[FOR_COND1_PREHEADER_LR_PH_I]] ], [ [[INC8_I:%.*]], %[[FOR_INC7_I:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[G_PROMOTED14_I]], %[[FOR_COND1_PREHEADER_LR_PH_I]] ], [ 0, %[[FOR_INC7_I]] ] +; CHECK-NEXT: br label %[[FOR_BODY3_I:.*]] +; CHECK: [[FOR_BODY3_I]]: +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FOR_COND1_PREHEADER_I]] ], [ true, %[[LOR_END_I:.*]] ] +; CHECK-NEXT: [[INC12_I:%.*]] = phi i32 [ 0, %[[FOR_COND1_PREHEADER_I]] ], [ [[INC_I:%.*]], %[[LOR_END_I]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP0]], %[[FOR_COND1_PREHEADER_I]] ], [ 0, %[[LOR_END_I]] ] ; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK-NEXT: [[OR_COND_I:%.*]] = and i1 [[TMP1]], [[TOBOOL_I]] -; CHECK-NEXT: br i1 [[OR_COND_I]], label [[LOR_END_I]], label [[LOR_RHS_I:%.*]] -; CHECK: lor.rhs.i: +; CHECK-NEXT: br i1 [[OR_COND_I]], label %[[LOR_END_I]], label %[[LOR_RHS_I:.*]] +; CHECK: [[LOR_RHS_I]]: ; CHECK-NEXT: [[LNOT_I:%.*]] = xor i1 [[TOBOOL_I]], true ; CHECK-NEXT: [[LNOT_EXT_I:%.*]] = zext i1 [[LNOT_I]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @e, align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @e, align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[TMP3]], [[LNOT_EXT_I]] -; CHECK-NEXT: store i32 [[XOR_I]], ptr @e, align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: br label [[LOR_END_I]] -; CHECK: lor.end.i: +; CHECK-NEXT: store i32 [[XOR_I]], ptr @e, align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: br label %[[LOR_END_I]] +; CHECK: [[LOR_END_I]]: ; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[INC12_I]], 1 ; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i32 [[INC_I]], 2 -; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[FOR_INC7_I]], label [[FOR_BODY3_I]] -; CHECK: for.inc7.i: +; CHECK-NEXT: br i1 [[EXITCOND_I]], label %[[FOR_INC7_I]], label %[[FOR_BODY3_I]] +; CHECK: [[FOR_INC7_I]]: ; CHECK-NEXT: [[INC8_I]] = add nsw i32 [[INC816_I]], 1 ; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[INC816_I]], 0 -; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_COND1_PREHEADER_I]], label [[FOR_COND_FOR_END9_CRIT_EDGE_I:%.*]] -; CHECK: for.cond.for.end9_crit_edge.i: -; CHECK-NEXT: store i32 0, ptr @g, align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: store i32 2, ptr @h, align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: store i32 [[INC8_I]], ptr @c, align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: br label [[FOR_END9_I]] -; CHECK: for.end9.i: -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[DOTPRE]], [[ENTRY_FOR_END9_I_CRIT_EDGE]] ], [ 2, [[FOR_COND_FOR_END9_CRIT_EDGE_I]] ] -; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr @b, align 8, !tbaa [[TBAA7:![0-9]+]] -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr @e, align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: br i1 [[CMP_I]], label %[[FOR_COND1_PREHEADER_I]], label %[[FOR_COND_FOR_END9_CRIT_EDGE_I:.*]] +; CHECK: [[FOR_COND_FOR_END9_CRIT_EDGE_I]]: +; CHECK-NEXT: store i32 0, ptr @g, align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: store i32 2, ptr @h, align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: store i32 [[INC8_I]], ptr @c, align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: br label %[[FOR_END9_I]] +; CHECK: [[FOR_END9_I]]: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[DOTPRE]], %[[ENTRY_FOR_END9_I_CRIT_EDGE]] ], [ 2, %[[FOR_COND_FOR_END9_CRIT_EDGE_I]] ] +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr @b, align 8, !tbaa [[ANYPTR_TBAA7:![0-9]+]] +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr @e, align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[CMP10_I:%.*]] = icmp slt i32 [[TMP6]], -1 -; CHECK-NEXT: br i1 [[CMP10_I]], label [[IF_THEN_I:%.*]], label [[FN1_EXIT:%.*]] -; CHECK: if.then.i: -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @f, align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: br label [[FN1_EXIT]] -; CHECK: fn1.exit: -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr @a, align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: br i1 [[CMP10_I]], label %[[IF_THEN_I:.*]], label %[[FN1_EXIT:.*]] +; CHECK: [[IF_THEN_I]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @f, align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4, !tbaa [[INT_TBAA3]] +; CHECK-NEXT: br label %[[FN1_EXIT]] +; CHECK: [[FN1_EXIT]]: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr @a, align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: ; CHECK-NEXT: [[PUTS2:%.*]] = tail call i32 @puts(ptr @str.2) ; CHECK-NEXT: tail call void @abort() #[[ATTR3:[0-9]+]] ; CHECK-NEXT: unreachable -; CHECK: if.end: +; CHECK: [[IF_END]]: ; CHECK-NEXT: [[PUTS:%.*]] = tail call i32 @puts(ptr @str) ; CHECK-NEXT: ret i32 0 ; @@ -183,3 +184,11 @@ attributes #3 = { noreturn nounwind optsize } !6 = !{!"Simple C/C++ TBAA"} !7 = !{!8, !8, i64 0} !8 = !{!"any pointer", !5, i64 0} +;. +; CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[ANYPTR_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; CHECK: [[META8]] = !{!"any pointer", [[META5]], i64 0} +;. diff --git a/llvm/test/Transforms/NewGVN/pr33367.ll b/llvm/test/Transforms/NewGVN/pr33367.ll index 597caa2b34ef2..428a053bcc894 100644 --- a/llvm/test/Transforms/NewGVN/pr33367.ll +++ b/llvm/test/Transforms/NewGVN/pr33367.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -aa-pipeline=basic-aa -passes=newgvn -S %s | FileCheck %s ; Verify that we don't accidentally delete intrinsics that aren't SSA copies %DS_struct = type { [32 x ptr], i8, [32 x i16] } @@ -7,47 +7,48 @@ declare i64 @llvm.x86.bmi.bextr.64(i64, i64) #3 define %MNR_struct @f000316011717_2(ptr %pDS, ptr %pCG) #2 { -; CHECK-LABEL: @f000316011717_2( -; CHECK-NEXT: Entry: +; CHECK-LABEL: define %MNR_struct @f000316011717_2( +; CHECK-SAME: ptr [[PDS:%.*]], ptr [[PCG:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[RESTART:%.*]] = alloca [[MNR_STRUCT:%.*]], align 8 -; CHECK-NEXT: [[PCARRY:%.*]] = getelementptr [[DS_STRUCT:%.*]], ptr [[PDS:%.*]], i32 0, i32 1 -; CHECK-NEXT: [[BASE:%.*]] = load ptr, ptr [[PDS]], align 8, !tbaa [[TBAA14:![0-9]+]] +; CHECK-NEXT: [[PCARRY:%.*]] = getelementptr [[DS_STRUCT:%.*]], ptr [[PDS]], i32 0, i32 1 +; CHECK-NEXT: [[BASE:%.*]] = load ptr, ptr [[PDS]], align 8, !tbaa [[BREG_TBAA14:![0-9]+]] ; CHECK-NEXT: [[ABSADDR:%.*]] = getelementptr i64, ptr [[BASE]], i64 9 -; CHECK-NEXT: [[EXTARGET:%.*]] = load i64, ptr [[ABSADDR]], align 8, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: [[EXTARGET:%.*]] = load i64, ptr [[ABSADDR]], align 8, !tbaa [[MEM_TBAA4:![0-9]+]] ; CHECK-NEXT: [[TEMPLATE:%.*]] = icmp eq i64 [[EXTARGET]], 8593987412 -; CHECK-NEXT: br i1 [[TEMPLATE]], label %"BB3.000316011731#1", label [[BB2_000316011731_5:%.*]] +; CHECK-NEXT: br i1 [[TEMPLATE]], label %"BB3.000316011731#1", label %[[BB2_000316011731_5:.*]] ; CHECK: "BB3.000316011731#1": ; CHECK-NEXT: [[PBASE8:%.*]] = getelementptr [32 x ptr], ptr [[PDS]], i64 0, i64 29 -; CHECK-NEXT: [[BASE9:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[TBAA14]] +; CHECK-NEXT: [[BASE9:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[BREG_TBAA14]] ; CHECK-NEXT: [[ABSADDR1:%.*]] = getelementptr i64, ptr [[BASE9]], i64 7 -; CHECK-NEXT: [[RMEM:%.*]] = load i64, ptr [[ABSADDR1]], align 8, !tbaa [[TBAA4]] +; CHECK-NEXT: [[RMEM:%.*]] = load i64, ptr [[ABSADDR1]], align 8, !tbaa [[MEM_TBAA4]] ; CHECK-NEXT: [[PWT:%.*]] = getelementptr [[DS_STRUCT]], ptr [[PDS]], i32 0, i32 2 ; CHECK-NEXT: [[PWTE:%.*]] = getelementptr [32 x i16], ptr [[PWT]], i64 0, i64 8593987412 -; CHECK-NEXT: [[SHIFTS:%.*]] = load i16, ptr [[PWTE]], align 2, !tbaa [[TBAA18:![0-9]+]], !invariant.load [[META20:![0-9]+]] +; CHECK-NEXT: [[SHIFTS:%.*]] = load i16, ptr [[PWTE]], align 2, !tbaa [[CONST_TBAA18:![0-9]+]], !invariant.load [[META20:![0-9]+]] ; CHECK-NEXT: [[SLOWJ:%.*]] = icmp eq i16 [[SHIFTS]], 0 -; CHECK-NEXT: br i1 [[SLOWJ]], label [[BB2_000316011731_5]], label %"BB3.000316011731#1.1" -; CHECK: BB2.000316011731.5: +; CHECK-NEXT: br i1 [[SLOWJ]], label %[[BB2_000316011731_5]], label %"BB3.000316011731#1.1" +; CHECK: [[BB2_000316011731_5]]: ; CHECK-NEXT: [[EXTARGET1:%.*]] = and i64 [[EXTARGET]], 137438953471 -; CHECK-NEXT: switch i64 [[EXTARGET1]], label [[EXIT:%.*]] [ +; CHECK-NEXT: switch i64 [[EXTARGET1]], label %[[EXIT:.*]] [ ; CHECK-NEXT: ] ; CHECK: "BB3.000316011731#1.1": ; CHECK-NEXT: [[SHIFTS1:%.*]] = zext i16 [[SHIFTS]] to i64 ; CHECK-NEXT: [[VAL:%.*]] = call i64 @llvm.x86.bmi.bextr.64(i64 [[RMEM]], i64 [[SHIFTS1]]) -; CHECK-NEXT: [[PREG:%.*]] = getelementptr [64 x i64], ptr [[PCG:%.*]], i64 0, i64 12 -; CHECK-NEXT: store i64 [[VAL]], ptr [[PREG]], align 32, !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: [[PREG:%.*]] = getelementptr [64 x i64], ptr [[PCG]], i64 0, i64 12 +; CHECK-NEXT: store i64 [[VAL]], ptr [[PREG]], align 32, !tbaa [[A0_TBAA10:![0-9]+]] ; CHECK-NEXT: [[PREG2:%.*]] = getelementptr [64 x i64], ptr [[PCG]], i64 0, i64 14 -; CHECK-NEXT: [[REG:%.*]] = load i64, ptr [[PREG2]], align 16, !tbaa [[TBAA12:![0-9]+]] -; CHECK-NEXT: [[BASE2:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[TBAA14]] +; CHECK-NEXT: [[REG:%.*]] = load i64, ptr [[PREG2]], align 16, !tbaa [[A2_TBAA12:![0-9]+]] +; CHECK-NEXT: [[BASE2:%.*]] = load ptr, ptr [[PBASE8]], align 8, !tbaa [[BREG_TBAA14]] ; CHECK-NEXT: [[ABSADDR2:%.*]] = getelementptr i64, ptr [[BASE2]], i64 [[REG]] -; CHECK-NEXT: [[RMEM2:%.*]] = load i64, ptr [[ABSADDR2]], align 8, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: [[RMEM2:%.*]] = load i64, ptr [[ABSADDR2]], align 8, !tbaa [[MEM_TBAA4]] ; CHECK-NEXT: [[PREG7:%.*]] = getelementptr [64 x i64], ptr [[PCG]], i64 0, i64 9 -; CHECK-NEXT: store i64 [[RMEM2]], ptr [[PREG7]], align 8, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: store i64 [[RMEM2]], ptr [[PREG7]], align 8, !tbaa [[X9_TBAA8:![0-9]+]] ; CHECK-NEXT: [[ADD2C279:%.*]] = add i64 [[RMEM2]], [[VAL]] ; CHECK-NEXT: [[CCHK:%.*]] = icmp sge i64 [[ADD2C279]], 0 ; CHECK-NEXT: [[CFL:%.*]] = zext i1 [[CCHK]] to i8 -; CHECK-NEXT: store i8 [[CFL]], ptr [[PCARRY]], align 1, !tbaa [[TBAA16:![0-9]+]] -; CHECK-NEXT: br label [[EXIT]] -; CHECK: Exit: +; CHECK-NEXT: store i8 [[CFL]], ptr [[PCARRY]], align 1, !tbaa [[CARRY_TBAA16:![0-9]+]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RESTART378:%.*]] = load [[MNR_STRUCT]], ptr [[RESTART]], align 8 ; CHECK-NEXT: ret [[MNR_STRUCT]] [[RESTART378]] ; @@ -129,3 +130,24 @@ attributes #3 = { nounwind readnone } !175 = !{!176, !176, i64 0, i32 1} !176 = !{!"const", !3} !181 = !{} +;. +; CHECK: [[META0:![0-9]+]] = !{!"tbaa2200"} +; CHECK: [[META2:![0-9]+]] = !{!"data", [[META0]]} +; CHECK: [[META3:![0-9]+]] = !{!"ctrl", [[META0]]} +; CHECK: [[MEM_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"mem", [[META2]]} +; CHECK: [[META7:![0-9]+]] = !{!"grs", [[META2]]} +; CHECK: [[X9_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +; CHECK: [[META9]] = !{!"X9", [[META7]]} +; CHECK: [[A0_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} +; CHECK: [[META11]] = !{!"A0", [[META7]]} +; CHECK: [[A2_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; CHECK: [[META13]] = !{!"A2", [[META7]]} +; CHECK: [[BREG_TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} +; CHECK: [[META15]] = !{!"breg", [[META3]]} +; CHECK: [[CARRY_TBAA16]] = !{[[META17:![0-9]+]], [[META17]], i64 0} +; CHECK: [[META17]] = !{!"carry", [[META3]]} +; CHECK: [[CONST_TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0, i32 1} +; CHECK: [[META19]] = !{!"const", [[META3]]} +; CHECK: [[META20]] = !{} +;. diff --git a/llvm/test/Transforms/NewGVN/pr34452.ll b/llvm/test/Transforms/NewGVN/pr34452.ll index 9e65349a1b47b..48bdd88e9591a 100644 --- a/llvm/test/Transforms/NewGVN/pr34452.ll +++ b/llvm/test/Transforms/NewGVN/pr34452.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s ;; Ensure we don't crash when simplifying aggregate value expressions source_filename = "bugpoint-output-09f7a24.bc" @@ -7,17 +7,18 @@ source_filename = "bugpoint-output-09f7a24.bc" ; Function Attrs: nounwind uwtable define void @sgrep() local_unnamed_addr #0 { -; CHECK-LABEL: @sgrep( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @WHOLELINE, align 4, !tbaa [[TBAA1:![0-9]+]] +; CHECK-LABEL: define void @sgrep( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @WHOLELINE, align 4, !tbaa [[INT_TBAA1:![0-9]+]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2048, i32 2047 -; CHECK-NEXT: br label [[WHILE_BODY_US:%.*]] -; CHECK: while.body.us: -; CHECK-NEXT: [[START_1230_US:%.*]] = phi i32 [ [[DOT]], [[ENTRY:%.*]] ], [ 0, [[WHILE_BODY_US]] ] +; CHECK-NEXT: br label %[[WHILE_BODY_US:.*]] +; CHECK: [[WHILE_BODY_US]]: +; CHECK-NEXT: [[START_1230_US:%.*]] = phi i32 [ [[DOT]], %[[ENTRY]] ], [ 0, %[[WHILE_BODY_US]] ] ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[START_1230_US]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 0, i64 [[TMP1]]) -; CHECK-NEXT: br label [[WHILE_BODY_US]] +; CHECK-NEXT: br label %[[WHILE_BODY_US]] ; entry: %0 = load i32, ptr @WHOLELINE, align 4, !tbaa !1 @@ -47,3 +48,9 @@ attributes #1 = { nounwind readnone speculatable } !2 = !{!"int", !3, i64 0} !3 = !{!"omnipotent char", !4, i64 0} !4 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll b/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll index a63ca131b5c0d..c1e52b89ea620 100644 --- a/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll +++ b/llvm/test/Transforms/NewGVN/preserve-metadata-for-predicate-replacements.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=newgvn %s -S | FileCheck %s declare void @use(i32) @@ -7,25 +7,26 @@ declare void @use(i32) ; PredicateInfo are replaced. define i32 @test(ptr %p1, ptr %p2, i1 %c) { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[P1:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[P1]], align 8, !tbaa [[INT_TBAA0:![0-9]+]] ; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[LV]], 1 -; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[IF_FALSE:%.*]] -; CHECK: if.false: -; CHECK-NEXT: br i1 [[C:%.*]], label [[EXIT]], label [[FOR_CHECK:%.*]] -; CHECK: for.check: +; CHECK-NEXT: br i1 [[CMP_1]], label %[[EXIT:.*]], label %[[IF_FALSE:.*]] +; CHECK: [[IF_FALSE]]: +; CHECK-NEXT: br i1 [[C]], label %[[EXIT]], label %[[FOR_CHECK:.*]] +; CHECK: [[FOR_CHECK]]: ; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i32 [[LV]], 0 -; CHECK-NEXT: br i1 [[CMP_2]], label [[FOR_PH:%.*]], label [[EXIT]] -; CHECK: for.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br i1 [[CMP_2]], label %[[FOR_PH:.*]], label %[[EXIT]] +; CHECK: [[FOR_PH]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[FOR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: call void @use(i32 [[IV]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[CMP_3:%.*]] = icmp ne i32 [[IV_NEXT]], [[LV]] -; CHECK-NEXT: br i1 [[CMP_3]], label [[FOR_BODY]], label [[EXIT]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[CMP_3]], label %[[FOR_BODY]], label %[[EXIT]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret i32 [[LV]] ; entry: @@ -59,3 +60,10 @@ exit: ; preds = %for.body, %for.check !2 = !{!"int", !3, i64 0} !3 = !{!"omnipotent char", !4, i64 0} !4 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0} +; CHECK: [[META1]] = !{!"FULL", [[META2]], i64 0, [[META2]], i64 4, [[META3:![0-9]+]], i64 8} +; CHECK: [[META2]] = !{!"int", [[META3]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/NewGVN/tbaa.ll b/llvm/test/Transforms/NewGVN/tbaa.ll index 20c09aa68726a..a90660349f2f4 100644 --- a/llvm/test/Transforms/NewGVN/tbaa.ll +++ b/llvm/test/Transforms/NewGVN/tbaa.ll @@ -1,10 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s define i32 @test1(ptr %p, ptr %q) { ; CHECK-LABEL: define i32 @test1( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -17,7 +17,7 @@ define i32 @test1(ptr %p, ptr %q) { define i32 @test2(ptr %p, ptr %q) { ; CHECK-LABEL: define i32 @test2( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -30,7 +30,7 @@ define i32 @test2(ptr %p, ptr %q) { define i32 @test3(ptr %p, ptr %q) { ; CHECK-LABEL: define i32 @test3( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[B_TBAA4:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -43,7 +43,7 @@ define i32 @test3(ptr %p, ptr %q) { define i32 @test4(ptr %p, ptr %q) { ; CHECK-LABEL: define i32 @test4( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[A_TBAA6:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -56,7 +56,7 @@ define i32 @test4(ptr %p, ptr %q) { define i32 @test5(ptr %p, ptr %q) { ; CHECK-LABEL: define i32 @test5( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -69,7 +69,7 @@ define i32 @test5(ptr %p, ptr %q) { define i32 @test6(ptr %p, ptr %q) { ; CHECK-LABEL: define i32 @test6( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[C_TBAA0]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -82,7 +82,7 @@ define i32 @test6(ptr %p, ptr %q) { define i32 @test7(ptr %p, ptr %q) { ; CHECK-LABEL: define i32 @test7( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[SCALAR_TYPE_TBAA7:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -129,7 +129,7 @@ define i32 @test10(ptr %p, ptr %q) { ; and not just the common final access type. ; CHECK-LABEL: define i32 @test10( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[INT_TBAA10:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -165,17 +165,17 @@ declare i32 @foo(ptr) readonly !9 = !{!"yet another root"} !10 = !{!"node", !9, i64 1} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[C_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"C", [[META2:![0-9]+]]} ; CHECK: [[META2]] = !{!"A", [[META3:![0-9]+]]} ; CHECK: [[META3]] = !{!"tbaa root"} -; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[B_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} ; CHECK: [[META5]] = !{!"B", [[META2]]} -; CHECK: [[TBAA6]] = !{[[META2]], [[META2]], i64 0} -; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; CHECK: [[A_TBAA6]] = !{[[META2]], [[META2]], i64 0} +; CHECK: [[SCALAR_TYPE_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} ; CHECK: [[META8]] = !{!"scalar type", [[META9:![0-9]+]]} ; CHECK: [[META9]] = !{!"another root"} -; CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0} +; CHECK: [[INT_TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0} ; CHECK: [[META11]] = !{!"struct X", [[META12]], i64 0} ; CHECK: [[META12]] = !{!"int", [[META13:![0-9]+]], i64 0} ; CHECK: [[META13]] = !{!"char", [[META3]], i64 0} diff --git a/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll b/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll index d8b28d73f24ee..68f7ee5c64e38 100644 --- a/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll +++ b/llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s %struct.t = type { ptr } @@ -8,10 +8,10 @@ define void @test1(ptr nocapture readonly %p, i32 %v) #0 { ; CHECK-LABEL: define void @test1( ; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA5:![0-9]+]] -; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA5]] +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[ANYPTR_TBAA0:![0-9]+]] +; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] +; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA5]] ; CHECK-NEXT: ret void ; entry: @@ -27,11 +27,11 @@ entry: define void @test2(ptr nocapture readonly %p, i32 %v) #0 { ; CHECK-LABEL: define void @test2( ; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[ANYPTR_TBAA0]] +; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[ANYPTR_TBAA0]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[ANYPTR_TBAA0]] +; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[ANYPTR_TBAA0]] ; CHECK-NEXT: ret void ; entry: @@ -47,11 +47,11 @@ entry: define void @test3(ptr nocapture readonly %p, i32 %v) #0 { ; CHECK-LABEL: define void @test3( ; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[TBAA5]] -; CHECK-NEXT: [[TMP1:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[TBAA5]] +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[ANYPTR_TBAA0]] +; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA5]] +; CHECK-NEXT: [[TMP1:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[ANYPTR_TBAA0]] +; CHECK-NEXT: store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA5]] ; CHECK-NEXT: ret void ; entry: @@ -73,11 +73,11 @@ attributes #0 = { norecurse nounwind } !7 = !{!"int", !4, i64 0} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0} +; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0} ; CHECK: [[META1]] = !{!"", [[META2]], i64 0} ; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} -; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} ; CHECK: [[META6]] = !{!"int", [[META3]], i64 0} ;. diff --git a/llvm/test/Transforms/OpenMP/dead_use.ll b/llvm/test/Transforms/OpenMP/dead_use.ll index b3f5194b10fc3..1c4b2c6fe27a6 100644 --- a/llvm/test/Transforms/OpenMP/dead_use.ll +++ b/llvm/test/Transforms/OpenMP/dead_use.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -passes=openmp-opt-cgscc < %s | FileCheck %s %struct.ident_t = type { i32, i32, i32, i32, ptr } @@ -7,8 +7,8 @@ ; Function Attrs: nounwind uwtable define dso_local i32 @b() #0 { -; CHECK-LABEL: define {{[^@]+}}@b -; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-LABEL: define dso_local i32 @b( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a() ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 @@ -22,8 +22,8 @@ define dso_local i32 @b() #0 { ; Function Attrs: nounwind uwtable define internal i32 @a() #0 { -; CHECK-LABEL: define {{[^@]+}}@a -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-LABEL: define internal i32 @a( +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @b() ; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB0:[0-9]+]], i32 0, ptr @.omp_outlined.) @@ -39,12 +39,12 @@ define internal i32 @a() #0 { ; Function Attrs: norecurse nounwind uwtable define internal void @.omp_outlined.(ptr noalias %0, ptr noalias %1) #1 { -; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (ptr noalias [[TMP0:%.*]], ptr noalias [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-LABEL: define internal void @.omp_outlined.( +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr noalias [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP3:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8, !tbaa [[TBAA2:![0-9]+]] -; CHECK-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8, !tbaa [[TBAA2]] +; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA2:![0-9]+]] +; CHECK-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8, !tbaa [[ANYPTR_TBAA2]] ; CHECK-NEXT: ret void ; %3 = alloca ptr, align 8 @@ -72,3 +72,9 @@ attributes #2 = { nounwind } !5 = !{!"Simple C/C++ TBAA"} !6 = !{!7} !7 = !{i64 2, i64 -1, i64 -1, i1 true} +;. +; CHECK: [[ANYPTR_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +; CHECK: [[META3]] = !{!"any pointer", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll index 1d18e527e1466..ad3955e2b9dd9 100644 --- a/llvm/test/Transforms/OpenMP/global_constructor.ll +++ b/llvm/test/Transforms/OpenMP/global_constructor.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --include-generated-funcs --version 6 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s %struct.ident_t = type { i32, i32, i32, i32, ptr } @@ -74,34 +74,40 @@ attributes #1 = { convergent nounwind } !12 = !{!"double", !13, i64 0} !13 = !{!"omnipotent char", !14, i64 0} !14 = !{!"Simple C++ TBAA"} -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11 -; CHECK-SAME: (ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_85283c04_main_l11( +; CHECK-SAME: ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment, ptr [[DYN]]) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; CHECK: common.ret: +; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; CHECK: [[COMMON_RET]]: ; CHECK-NEXT: ret void -; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA9:![0-9]+]] +; CHECK: [[USER_CODE_ENTRY]]: +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[DOUBLE_TBAA9:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] -; CHECK: region.guarded: -; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA9]] -; CHECK-NEXT: br label [[REGION_BARRIER]] -; CHECK: region.barrier: +; CHECK-NEXT: br i1 [[TMP3]], label %[[REGION_GUARDED:.*]], label %[[REGION_BARRIER:.*]] +; CHECK: [[REGION_GUARDED]]: +; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[DOUBLE_TBAA9]] +; CHECK-NEXT: br label %[[REGION_BARRIER]] +; CHECK: [[REGION_BARRIER]]: ; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR1]] ; CHECK-NEXT: tail call void @__kmpc_target_deinit() #[[ATTR1]] -; CHECK-NEXT: br label [[COMMON_RET]] +; CHECK-NEXT: br label %[[COMMON_RET]] ; ; -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define weak ptx_kernel void @__omp_offloading__fd02_85283c04_Device_l6_ctor( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2]] ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]] -; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA9]] +; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[DOUBLE_TBAA9]] ; CHECK-NEXT: ret void ; +;. +; CHECK: [[DOUBLE_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +; CHECK: [[META10]] = !{!"double", [[META11:![0-9]+]], i64 0} +; CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0} +; CHECK: [[META12]] = !{!"Simple C++ TBAA"} +;. diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 0272c41d9d1fc..19d447449dee4 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED1 @@ -186,33 +186,33 @@ ; NVPTX-DISABLED2: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4 ;. define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { +; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5( +; AMDGPU-SAME: ) #[[ATTR0:[0-9]+]] { ; AMDGPU-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; NVPTX-SAME: () #[[ATTR0:[0-9]+]] { +; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5( +; NVPTX-SAME: ) #[[ATTR0:[0-9]+]] { ; NVPTX-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] { +; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5( +; AMDGPU-DISABLED1-SAME: ) #[[ATTR0:[0-9]+]] { ; AMDGPU-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] { +; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5( +; AMDGPU-DISABLED2-SAME: ) #[[ATTR0:[0-9]+]] { ; AMDGPU-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; NVPTX-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] { +; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5( +; NVPTX-DISABLED1-SAME: ) #[[ATTR0:[0-9]+]] { ; NVPTX-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] { +; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5( +; NVPTX-DISABLED2-SAME: ) #[[ATTR0:[0-9]+]] { ; NVPTX-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED2-NEXT: ret void ; @@ -221,47 +221,47 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5() } define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug( +; AMDGPU-SAME: ) #[[ATTR1:[0-9]+]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug( +; NVPTX-SAME: ) #[[ATTR1:[0-9]+]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug( +; AMDGPU-DISABLED1-SAME: ) #[[ATTR1:[0-9]+]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -269,71 +269,71 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU-DISABLED1: [[IS_WORKER_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1: -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: +; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED1: [[COMMON_RET]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: +; AMDGPU-DISABLED1: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug( +; AMDGPU-DISABLED2-SAME: ) #[[ATTR1:[0-9]+]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: +; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED2: [[COMMON_RET]]: ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: +; AMDGPU-DISABLED2: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED2-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug( +; NVPTX-DISABLED1-SAME: ) #[[ATTR1:[0-9]+]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -341,66 +341,66 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX-DISABLED1: [[IS_WORKER_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1: -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: +; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED1: [[COMMON_RET]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: +; NVPTX-DISABLED1: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] +; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug( +; NVPTX-DISABLED2-SAME: ) #[[ATTR1:[0-9]+]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: +; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED2: [[COMMON_RET]]: ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: +; NVPTX-DISABLED2: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] +; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED2-NEXT: br label %[[COMMON_RET]] ; entry: %.zero.addr = alloca ptr, align 8, addrspace(5) @@ -424,125 +424,125 @@ user_code.entry: ; preds = %entry } define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-NEXT: [[ENTRY:.*]]: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU: for.cond: -; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU: [[FOR_COND]]: +; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU: for.cond.cleanup: +; AMDGPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU: [[FOR_COND_CLEANUP]]: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-NEXT: ret void -; AMDGPU: for.body: +; AMDGPU: [[FOR_BODY]]: ; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-NEXT: [[ENTRY:.*]]: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-NEXT: br label [[FOR_COND:%.*]] -; NVPTX: for.cond: -; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-NEXT: br label %[[FOR_COND:.*]] +; NVPTX: [[FOR_COND]]: +; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX: for.cond.cleanup: +; NVPTX-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX: [[FOR_COND_CLEANUP]]: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-NEXT: ret void -; NVPTX: for.body: +; NVPTX: [[FOR_BODY]]: ; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*]]: ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED1: for.cond: -; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-DISABLED1-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU-DISABLED1: [[FOR_COND]]: +; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED1: for.cond.cleanup: +; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU-DISABLED1: [[FOR_COND_CLEANUP]]: ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: for.body: +; AMDGPU-DISABLED1: [[FOR_BODY]]: ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*]]: ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED2: for.cond: -; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-DISABLED2-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU-DISABLED2: [[FOR_COND]]: +; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED2: for.cond.cleanup: +; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU-DISABLED2: [[FOR_COND_CLEANUP]]: ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: for.body: +; AMDGPU-DISABLED2: [[FOR_BODY]]: ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*]]: ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED1: for.cond: -; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-DISABLED1-NEXT: br label %[[FOR_COND:.*]] +; NVPTX-DISABLED1: [[FOR_COND]]: +; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED1: for.cond.cleanup: +; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX-DISABLED1: [[FOR_COND_CLEANUP]]: ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: for.body: +; NVPTX-DISABLED1: [[FOR_BODY]]: ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +; NVPTX-DISABLED1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*]]: ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED2: for.cond: -; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-DISABLED2-NEXT: br label %[[FOR_COND:.*]] +; NVPTX-DISABLED2: [[FOR_COND]]: +; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED2: for.cond.cleanup: +; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX-DISABLED2: [[FOR_COND_CLEANUP]]: ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: for.body: +; NVPTX-DISABLED2: [[FOR_BODY]]: ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +; NVPTX-DISABLED2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; entry: %captured_vars_addrs = alloca ptr, align 8, addrspace(5) @@ -566,39 +566,39 @@ for.body: ; preds = %for.cond } define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__1( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__1( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__1( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__1( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__1( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__1( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; NVPTX-DISABLED2-NEXT: ret void ; @@ -609,9 +609,9 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__1_wrapper( +; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -622,9 +622,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__1_wrapper( +; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -635,9 +635,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__1_wrapper( +; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -648,9 +648,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__1_wrapper( +; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -661,9 +661,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__1_wrapper( +; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -674,9 +674,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__1_wrapper( +; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -703,47 +703,47 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20( +; AMDGPU-SAME: ) #[[ATTR0]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20( +; NVPTX-SAME: ) #[[ATTR0]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20( +; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -751,71 +751,71 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU-DISABLED1: [[IS_WORKER_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1: -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: +; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED1: [[COMMON_RET]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: +; AMDGPU-DISABLED1: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20( +; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: +; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED2: [[COMMON_RET]]: ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: +; AMDGPU-DISABLED2: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED2-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20( +; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -823,66 +823,66 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX-DISABLED1: [[IS_WORKER_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1: -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: +; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED1: [[COMMON_RET]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: +; NVPTX-DISABLED1: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20( +; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: +; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED2: [[COMMON_RET]]: ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: +; NVPTX-DISABLED2: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED2-NEXT: br label %[[COMMON_RET]] ; entry: %.zero.addr = alloca ptr, align 8, addrspace(5) @@ -906,140 +906,140 @@ user_code.entry: ; preds = %entry } define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__2( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-NEXT: [[ENTRY:.*]]: ; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr ; AMDGPU-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]] -; AMDGPU-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU: for.cond: -; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU: [[FOR_COND]]: +; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU: for.cond.cleanup: +; AMDGPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU: [[FOR_COND_CLEANUP]]: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void -; AMDGPU: for.body: +; AMDGPU: [[FOR_BODY]]: ; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__2( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-NEXT: [[ENTRY:.*]]: ; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; NVPTX-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]] -; NVPTX-NEXT: br label [[FOR_COND:%.*]] -; NVPTX: for.cond: -; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-NEXT: br label %[[FOR_COND:.*]] +; NVPTX: [[FOR_COND]]: +; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX: for.cond.cleanup: +; NVPTX-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX: [[FOR_COND_CLEANUP]]: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void -; NVPTX: for.body: +; NVPTX: [[FOR_BODY]]: ; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__2( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*]]: ; AMDGPU-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; AMDGPU-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr ; AMDGPU-DISABLED1-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]] -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED1: for.cond: -; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-DISABLED1-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU-DISABLED1: [[FOR_COND]]: +; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED1: for.cond.cleanup: +; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU-DISABLED1: [[FOR_COND_CLEANUP]]: ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: for.body: +; AMDGPU-DISABLED1: [[FOR_BODY]]: ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__2( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*]]: ; AMDGPU-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; AMDGPU-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr ; AMDGPU-DISABLED2-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]] -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED2: for.cond: -; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-DISABLED2-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU-DISABLED2: [[FOR_COND]]: +; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED2: for.cond.cleanup: +; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU-DISABLED2: [[FOR_COND_CLEANUP]]: ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: for.body: +; AMDGPU-DISABLED2: [[FOR_BODY]]: ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__2( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*]]: ; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; NVPTX-DISABLED1-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]] -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED1: for.cond: -; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-DISABLED1-NEXT: br label %[[FOR_COND:.*]] +; NVPTX-DISABLED1: [[FOR_COND]]: +; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED1: for.cond.cleanup: +; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX-DISABLED1: [[FOR_COND_CLEANUP]]: ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: for.body: +; NVPTX-DISABLED1: [[FOR_BODY]]: ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +; NVPTX-DISABLED1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__2( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*]]: ; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; NVPTX-DISABLED2-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]] -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED2: for.cond: -; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-DISABLED2-NEXT: br label %[[FOR_COND:.*]] +; NVPTX-DISABLED2: [[FOR_COND]]: +; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED2: for.cond.cleanup: +; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX-DISABLED2: [[FOR_COND_CLEANUP]]: ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: for.body: +; NVPTX-DISABLED2: [[FOR_BODY]]: ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +; NVPTX-DISABLED2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; entry: %captured_vars_addrs = alloca ptr, align 8, addrspace(5) @@ -1066,39 +1066,39 @@ for.body: ; preds = %for.cond } define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__3( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__3( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__3( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__3( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__3( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__3( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void ; @@ -1109,9 +1109,9 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__3_wrapper( +; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1122,9 +1122,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__3_wrapper( +; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1135,9 +1135,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__3_wrapper( +; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1148,9 +1148,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__3_wrapper( +; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1161,9 +1161,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__3_wrapper( +; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1174,9 +1174,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__3_wrapper( +; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1203,47 +1203,47 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35( +; AMDGPU-SAME: ) #[[ATTR0]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35( +; NVPTX-SAME: ) #[[ATTR0]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35( +; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -1251,71 +1251,71 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU-DISABLED1: [[IS_WORKER_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1: -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: +; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED1: [[COMMON_RET]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: +; AMDGPU-DISABLED1: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35( +; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: +; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED2: [[COMMON_RET]]: ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: +; AMDGPU-DISABLED2: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED2-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35( +; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -1323,66 +1323,66 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX-DISABLED1: [[IS_WORKER_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1: -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: +; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED1: [[COMMON_RET]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: +; NVPTX-DISABLED1: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35( +; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: +; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED2: [[COMMON_RET]]: ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: +; NVPTX-DISABLED2: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED2-NEXT: br label %[[COMMON_RET]] ; entry: %.zero.addr = alloca ptr, align 8, addrspace(5) @@ -1406,131 +1406,131 @@ user_code.entry: ; preds = %entry } define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__4( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-NEXT: [[ENTRY:.*]]: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU: for.cond: -; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU: [[FOR_COND]]: +; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU: for.cond.cleanup: +; AMDGPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU: [[FOR_COND_CLEANUP]]: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void -; AMDGPU: for.body: -; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; AMDGPU: [[FOR_BODY]]: +; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]] ; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__4( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-NEXT: [[ENTRY:.*]]: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-NEXT: br label [[FOR_COND:%.*]] -; NVPTX: for.cond: -; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-NEXT: br label %[[FOR_COND:.*]] +; NVPTX: [[FOR_COND]]: +; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX: for.cond.cleanup: +; NVPTX-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX: [[FOR_COND_CLEANUP]]: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void -; NVPTX: for.body: -; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; NVPTX: [[FOR_BODY]]: +; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]] ; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__4( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*]]: ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED1: for.cond: -; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-DISABLED1-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU-DISABLED1: [[FOR_COND]]: +; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED1: for.cond.cleanup: +; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU-DISABLED1: [[FOR_COND_CLEANUP]]: ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; AMDGPU-DISABLED1: [[FOR_BODY]]: +; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]] ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__4( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*]]: ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED2: for.cond: -; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-DISABLED2-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU-DISABLED2: [[FOR_COND]]: +; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED2: for.cond.cleanup: +; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU-DISABLED2: [[FOR_COND_CLEANUP]]: ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; AMDGPU-DISABLED2: [[FOR_BODY]]: +; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]] ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__4( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*]]: ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED1: for.cond: -; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-DISABLED1-NEXT: br label %[[FOR_COND:.*]] +; NVPTX-DISABLED1: [[FOR_COND]]: +; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED1: for.cond.cleanup: +; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX-DISABLED1: [[FOR_COND_CLEANUP]]: ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; NVPTX-DISABLED1: [[FOR_BODY]]: +; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]] ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; NVPTX-DISABLED1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__4( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*]]: ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED2: for.cond: -; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-DISABLED2-NEXT: br label %[[FOR_COND:.*]] +; NVPTX-DISABLED2: [[FOR_COND]]: +; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED2: for.cond.cleanup: +; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX-DISABLED2: [[FOR_COND_CLEANUP]]: ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] +; NVPTX-DISABLED2: [[FOR_BODY]]: +; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]] ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; NVPTX-DISABLED2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; entry: %captured_vars_addrs = alloca ptr, align 8, addrspace(5) @@ -1557,57 +1557,57 @@ for.body: ; preds = %for.cond } define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-LABEL: define internal void @__omp_outlined__5( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-LABEL: define internal void @__omp_outlined__5( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__5( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] +; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__5( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] +; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__5( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] +; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__5( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] +; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void ; @@ -1621,9 +1621,9 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__5_wrapper( +; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1632,13 +1632,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__5_wrapper( +; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1647,13 +1647,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__5_wrapper( +; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1662,13 +1662,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__5_wrapper( +; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1677,13 +1677,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__5_wrapper( +; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1692,13 +1692,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__5_wrapper( +; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -1707,7 +1707,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: ret void ; @@ -1729,47 +1729,47 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50( +; AMDGPU-SAME: ) #[[ATTR0]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50( +; NVPTX-SAME: ) #[[ATTR0]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50( +; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -1777,71 +1777,71 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU-DISABLED1: [[IS_WORKER_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1: -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: +; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED1: [[COMMON_RET]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: +; AMDGPU-DISABLED1: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50( +; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: +; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED2: [[COMMON_RET]]: ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: +; AMDGPU-DISABLED2: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED2-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50( +; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -1849,66 +1849,66 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX-DISABLED1: [[IS_WORKER_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 true, label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1: -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1]]: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: +; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED1: [[COMMON_RET]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: +; NVPTX-DISABLED1: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50( +; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: +; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED2: [[COMMON_RET]]: ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: +; NVPTX-DISABLED2: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED2-NEXT: br label %[[COMMON_RET]] ; entry: %.zero.addr = alloca ptr, align 8, addrspace(5) @@ -1932,163 +1932,163 @@ user_code.entry: ; preds = %entry } define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__6( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]] -; AMDGPU: region.check.tid: +; AMDGPU-NEXT: br label %[[REGION_CHECK_TID:.*]] +; AMDGPU: [[REGION_CHECK_TID]]: ; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() ; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 -; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] -; AMDGPU: region.guarded: -; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] -; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]] -; AMDGPU: region.guarded.end: -; AMDGPU-NEXT: br label [[REGION_BARRIER]] -; AMDGPU: region.barrier: +; AMDGPU-NEXT: br i1 [[TMP1]], label %[[REGION_GUARDED:.*]], label %[[REGION_BARRIER:.*]] +; AMDGPU: [[REGION_GUARDED]]: +; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[INT_TBAA12]] +; AMDGPU-NEXT: br label %[[REGION_GUARDED_END:.*]] +; AMDGPU: [[REGION_GUARDED_END]]: +; AMDGPU-NEXT: br label %[[REGION_BARRIER]] +; AMDGPU: [[REGION_BARRIER]]: ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] -; AMDGPU: region.exit: -; AMDGPU-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU: for.cond: -; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-NEXT: br label %[[REGION_EXIT:.*]] +; AMDGPU: [[REGION_EXIT]]: +; AMDGPU-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU: [[FOR_COND]]: +; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[REGION_EXIT]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU: for.cond.cleanup: +; AMDGPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU: [[FOR_COND_CLEANUP]]: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void -; AMDGPU: for.body: -; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; AMDGPU: [[FOR_BODY]]: +; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__6( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]] -; NVPTX: region.check.tid: +; NVPTX-NEXT: br label %[[REGION_CHECK_TID:.*]] +; NVPTX: [[REGION_CHECK_TID]]: ; NVPTX-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() ; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 -; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] -; NVPTX: region.guarded: -; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]] -; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]] -; NVPTX: region.guarded.end: -; NVPTX-NEXT: br label [[REGION_BARRIER]] -; NVPTX: region.barrier: +; NVPTX-NEXT: br i1 [[TMP1]], label %[[REGION_GUARDED:.*]], label %[[REGION_BARRIER:.*]] +; NVPTX: [[REGION_GUARDED]]: +; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[INT_TBAA12]] +; NVPTX-NEXT: br label %[[REGION_GUARDED_END:.*]] +; NVPTX: [[REGION_GUARDED_END]]: +; NVPTX-NEXT: br label %[[REGION_BARRIER]] +; NVPTX: [[REGION_BARRIER]]: ; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]]) -; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] -; NVPTX: region.exit: -; NVPTX-NEXT: br label [[FOR_COND:%.*]] -; NVPTX: for.cond: -; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-NEXT: br label %[[REGION_EXIT:.*]] +; NVPTX: [[REGION_EXIT]]: +; NVPTX-NEXT: br label %[[FOR_COND:.*]] +; NVPTX: [[FOR_COND]]: +; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[REGION_EXIT]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX: for.cond.cleanup: +; NVPTX-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX: [[FOR_COND_CLEANUP]]: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void -; NVPTX: for.body: -; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; NVPTX: [[FOR_BODY]]: +; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__6( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*]]: ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED1: for.cond: -; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[INT_TBAA12]] +; AMDGPU-DISABLED1-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU-DISABLED1: [[FOR_COND]]: +; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED1: for.cond.cleanup: +; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU-DISABLED1: [[FOR_COND_CLEANUP]]: ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; AMDGPU-DISABLED1: [[FOR_BODY]]: +; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +; AMDGPU-DISABLED1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__6( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*]]: ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED2: for.cond: -; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[INT_TBAA12]] +; AMDGPU-DISABLED2-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU-DISABLED2: [[FOR_COND]]: +; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED2: for.cond.cleanup: +; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU-DISABLED2: [[FOR_COND_CLEANUP]]: ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; AMDGPU-DISABLED2: [[FOR_BODY]]: +; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +; AMDGPU-DISABLED2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__6( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*]]: ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]] -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED1: for.cond: -; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[INT_TBAA12]] +; NVPTX-DISABLED1-NEXT: br label %[[FOR_COND:.*]] +; NVPTX-DISABLED1: [[FOR_COND]]: +; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED1: for.cond.cleanup: +; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX-DISABLED1: [[FOR_COND_CLEANUP]]: ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; NVPTX-DISABLED1: [[FOR_BODY]]: +; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +; NVPTX-DISABLED1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__6( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*]]: ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]] -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED2: for.cond: -; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[INT_TBAA12]] +; NVPTX-DISABLED2-NEXT: br label %[[FOR_COND:.*]] +; NVPTX-DISABLED2: [[FOR_COND]]: +; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED2: for.cond.cleanup: +; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX-DISABLED2: [[FOR_COND_CLEANUP]]: ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] +; NVPTX-DISABLED2: [[FOR_BODY]]: +; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5) -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +; NVPTX-DISABLED2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; entry: %captured_vars_addrs = alloca ptr, align 8, addrspace(5) @@ -2116,57 +2116,57 @@ for.body: ; preds = %for.cond } define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-LABEL: define internal void @__omp_outlined__7( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-LABEL: define internal void @__omp_outlined__7( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__7( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] +; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__7( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] +; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__7( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] +; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__7( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] +; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void ; @@ -2180,9 +2180,9 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__7_wrapper( +; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -2191,13 +2191,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__7_wrapper( +; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -2206,13 +2206,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__7_wrapper( +; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -2221,13 +2221,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__7_wrapper( +; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -2236,13 +2236,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__7_wrapper( +; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -2251,13 +2251,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__7_wrapper( +; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -2266,7 +2266,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: ret void ; @@ -2288,9 +2288,9 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65( +; AMDGPU-SAME: ) #[[ATTR0]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -2298,47 +2298,47 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe ; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU: is_worker_check: +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU: [[IS_WORKER_CHECK]]: ; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-NEXT: ret void -; AMDGPU: worker_state_machine.is_active.check: -; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU: worker_state_machine.parallel_region.fallback.execute: +; AMDGPU: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65( +; NVPTX-SAME: ) #[[ATTR0]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -2346,46 +2346,46 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe ; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX: is_worker_check: +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX: [[IS_WORKER_CHECK]]: ; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-NEXT: ret void -; NVPTX: worker_state_machine.is_active.check: -; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX: worker_state_machine.parallel_region.fallback.execute: +; NVPTX: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX: thread.user_code.check: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65( +; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -2393,65 +2393,65 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU-DISABLED1: [[IS_WORKER_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: +; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED1: [[COMMON_RET]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: +; AMDGPU-DISABLED1: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65( +; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: +; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED2: [[COMMON_RET]]: ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: +; AMDGPU-DISABLED2: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED2-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65( +; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -2459,60 +2459,60 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX-DISABLED1: [[IS_WORKER_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: +; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED1: [[COMMON_RET]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: +; NVPTX-DISABLED1: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65( +; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: +; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED2: [[COMMON_RET]]: ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: +; NVPTX-DISABLED2: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED2-NEXT: br label %[[COMMON_RET]] ; entry: %.zero.addr = alloca ptr, align 8, addrspace(5) @@ -2536,39 +2536,39 @@ user_code.entry: ; preds = %entry } define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__8( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__8( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__8( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__8( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__8( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__8( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void ; @@ -2579,255 +2579,255 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74( +; AMDGPU-SAME: ) #[[ATTR0]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU: is_worker_check: +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU: [[IS_WORKER_CHECK]]: ; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-NEXT: ret void -; AMDGPU: worker_state_machine.is_active.check: -; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU: worker_state_machine.parallel_region.check: +; AMDGPU: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: ; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID -; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU: worker_state_machine.parallel_region.execute: +; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; AMDGPU-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU: worker_state_machine.parallel_region.fallback.execute: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] ; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74( +; NVPTX-SAME: ) #[[ATTR0]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX: is_worker_check: +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX: [[IS_WORKER_CHECK]]: ; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-NEXT: ret void -; NVPTX: worker_state_machine.is_active.check: -; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX: worker_state_machine.parallel_region.check: +; NVPTX: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: ; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID -; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX: worker_state_machine.parallel_region.execute: +; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; NVPTX-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX: worker_state_machine.parallel_region.fallback.execute: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX: thread.user_code.check: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] ; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74( +; AMDGPU-DISABLED1-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU-DISABLED1: [[IS_WORKER_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: +; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: +; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: +; AMDGPU-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: +; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED1: [[COMMON_RET]]: ; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: +; AMDGPU-DISABLED1: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74( +; AMDGPU-DISABLED2-SAME: ) #[[ATTR0]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: +; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU-DISABLED2: [[COMMON_RET]]: ; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: +; AMDGPU-DISABLED2: [[USER_CODE_ENTRY]]: ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] +; AMDGPU-DISABLED2-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74( +; NVPTX-DISABLED1-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX-DISABLED1: [[IS_WORKER_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: +; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: +; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE]]: ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX-DISABLED1: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: +; NVPTX-DISABLED1-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX-DISABLED1: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: +; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED1: [[COMMON_RET]]: ; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: +; NVPTX-DISABLED1: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED1-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74( +; NVPTX-DISABLED2-SAME: ) #[[ATTR0]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: +; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX-DISABLED2: [[COMMON_RET]]: ; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: +; NVPTX-DISABLED2: [[USER_CODE_ENTRY]]: ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] +; NVPTX-DISABLED2-NEXT: br label %[[COMMON_RET]] ; entry: %captured_vars_addrs = alloca ptr, align 8, addrspace(5) @@ -2850,39 +2850,39 @@ user_code.entry: ; preds = %entry ; Function Attrs: alwaysinline convergent nounwind define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #2 { -; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @.omp_outlined.( +; AMDGPU-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @.omp_outlined.( +; NVPTX-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @.omp_outlined.( +; AMDGPU-DISABLED1-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @.omp_outlined.( +; AMDGPU-DISABLED2-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @.omp_outlined.( +; NVPTX-DISABLED1-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @.omp_outlined.( +; NVPTX-DISABLED2-SAME: i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED2-NEXT: ret void ; @@ -2925,28 +2925,28 @@ declare void @unknowni32p(ptr) #7 declare void @llvm.lifetime.start.p0(ptr captures(none)) #8 define weak i32 @__kmpc_target_init(ptr %0, ptr %1) { -; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; AMDGPU-LABEL: define weak i32 @__kmpc_target_init( +; AMDGPU-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; AMDGPU-NEXT: ret i32 0 ; -; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; NVPTX-LABEL: define weak i32 @__kmpc_target_init( +; NVPTX-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; NVPTX-NEXT: ret i32 0 ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; AMDGPU-DISABLED1-LABEL: define weak i32 @__kmpc_target_init( +; AMDGPU-DISABLED1-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; AMDGPU-DISABLED1-NEXT: ret i32 0 ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; AMDGPU-DISABLED2-LABEL: define weak i32 @__kmpc_target_init( +; AMDGPU-DISABLED2-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; AMDGPU-DISABLED2-NEXT: ret i32 0 ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; NVPTX-DISABLED1-LABEL: define weak i32 @__kmpc_target_init( +; NVPTX-DISABLED1-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; NVPTX-DISABLED1-NEXT: ret i32 0 ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; NVPTX-DISABLED2-LABEL: define weak i32 @__kmpc_target_init( +; NVPTX-DISABLED2-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; NVPTX-DISABLED2-NEXT: ret i32 0 ; ret i32 0 @@ -2969,39 +2969,39 @@ declare i32 @__kmpc_global_thread_num(ptr) #3 declare void @__kmpc_target_deinit() define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__9( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__9( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__9( +; AMDGPU-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__9( +; AMDGPU-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__9( +; NVPTX-DISABLED1-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__9( +; NVPTX-DISABLED2-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED2-NEXT: ret void ; @@ -3012,9 +3012,9 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__9_wrapper( +; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -3025,9 +3025,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__9_wrapper( +; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -3038,9 +3038,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED1-NEXT: entry: +; AMDGPU-DISABLED1-LABEL: define internal void @__omp_outlined__9_wrapper( +; AMDGPU-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-DISABLED1-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -3051,9 +3051,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void ; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED2-NEXT: entry: +; AMDGPU-DISABLED2-LABEL: define internal void @__omp_outlined__9_wrapper( +; AMDGPU-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-DISABLED2-NEXT: [[ENTRY:.*:]] ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -3064,9 +3064,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void ; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED1-NEXT: entry: +; NVPTX-DISABLED1-LABEL: define internal void @__omp_outlined__9_wrapper( +; NVPTX-DISABLED1-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-DISABLED1-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -3077,9 +3077,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]] ; NVPTX-DISABLED1-NEXT: ret void ; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED2-NEXT: entry: +; NVPTX-DISABLED2-LABEL: define internal void @__omp_outlined__9_wrapper( +; NVPTX-DISABLED2-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-DISABLED2-NEXT: [[ENTRY:.*:]] ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -3237,7 +3237,7 @@ attributes #9 = { alwaysinline } ; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} ; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; AMDGPU: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} ; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} ; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} ; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"} @@ -3245,7 +3245,7 @@ attributes #9 = { alwaysinline } ; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"} ; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"} ; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} -; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; AMDGPU: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} ; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} @@ -3262,7 +3262,7 @@ attributes #9 = { alwaysinline } ; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} ; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; NVPTX: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} ; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} ; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} ; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"} @@ -3270,7 +3270,7 @@ attributes #9 = { alwaysinline } ; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"} ; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"} ; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} -; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; NVPTX: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} ; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} @@ -3287,7 +3287,7 @@ attributes #9 = { alwaysinline } ; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} ; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; AMDGPU-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; AMDGPU-DISABLED1: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} ; AMDGPU-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} ; AMDGPU-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} ; AMDGPU-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"} @@ -3295,7 +3295,7 @@ attributes #9 = { alwaysinline } ; AMDGPU-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"} ; AMDGPU-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"} ; AMDGPU-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} -; AMDGPU-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; AMDGPU-DISABLED1: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; AMDGPU-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} ; AMDGPU-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} @@ -3312,7 +3312,7 @@ attributes #9 = { alwaysinline } ; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} ; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; AMDGPU-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; AMDGPU-DISABLED2: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} ; AMDGPU-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} ; AMDGPU-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} ; AMDGPU-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"} @@ -3320,7 +3320,7 @@ attributes #9 = { alwaysinline } ; AMDGPU-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"} ; AMDGPU-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"} ; AMDGPU-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} -; AMDGPU-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; AMDGPU-DISABLED2: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; AMDGPU-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} ; AMDGPU-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} @@ -3337,7 +3337,7 @@ attributes #9 = { alwaysinline } ; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} ; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; NVPTX-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; NVPTX-DISABLED1: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} ; NVPTX-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} ; NVPTX-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} ; NVPTX-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"} @@ -3345,7 +3345,7 @@ attributes #9 = { alwaysinline } ; NVPTX-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"} ; NVPTX-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"} ; NVPTX-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} -; NVPTX-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; NVPTX-DISABLED1: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; NVPTX-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} ; NVPTX-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} @@ -3362,7 +3362,7 @@ attributes #9 = { alwaysinline } ; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} ; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; NVPTX-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; NVPTX-DISABLED2: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} ; NVPTX-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} ; NVPTX-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} ; NVPTX-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"} @@ -3370,7 +3370,7 @@ attributes #9 = { alwaysinline } ; NVPTX-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"} ; NVPTX-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"} ; NVPTX-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} -; NVPTX-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; NVPTX-DISABLED2: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; NVPTX-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} ; NVPTX-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll index 59e2499ead2ad..60d42ed931e76 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s ; void foo(double x) { @@ -29,35 +29,35 @@ target triple = "nvptx64" ; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. define weak ptx_kernel void @__omp_offloading_fd02_404433c2_main_l5(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5 -; CHECK-SAME: (ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define weak ptx_kernel void @__omp_offloading_fd02_404433c2_main_l5( +; CHECK-SAME: ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_fd02_404433c2_main_l5_kernel_environment, ptr [[DYN]]) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; CHECK: common.ret: +; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; CHECK: [[COMMON_RET]]: ; CHECK-NEXT: ret void -; CHECK: user_code.entry: +; CHECK: [[USER_CODE_ENTRY]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR3]] ; CHECK-NEXT: [[CALL_I:%.*]] = call double @__nv_sin(double 0x400921FB54442D18) #[[ATTR7:[0-9]+]] -; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] -; CHECK: region.check.tid: +; CHECK-NEXT: br label %[[REGION_CHECK_TID:.*]] +; CHECK: [[REGION_CHECK_TID]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] -; CHECK: region.guarded: -; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA7:![0-9]+]] -; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] -; CHECK: region.guarded.end: -; CHECK-NEXT: br label [[REGION_BARRIER]] -; CHECK: region.barrier: +; CHECK-NEXT: br i1 [[TMP3]], label %[[REGION_GUARDED:.*]], label %[[REGION_BARRIER:.*]] +; CHECK: [[REGION_GUARDED]]: +; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[DOUBLE_TBAA7:![0-9]+]] +; CHECK-NEXT: br label %[[REGION_GUARDED_END:.*]] +; CHECK: [[REGION_GUARDED_END]]: +; CHECK-NEXT: br label %[[REGION_BARRIER]] +; CHECK: [[REGION_BARRIER]]: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]]) -; CHECK-NEXT: br label [[REGION_EXIT:%.*]] -; CHECK: region.exit: +; CHECK-NEXT: br label %[[REGION_EXIT:.*]] +; CHECK: [[REGION_EXIT]]: ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 0) #[[ATTR3]] ; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR3]] -; CHECK-NEXT: br label [[COMMON_RET]] +; CHECK-NEXT: br label %[[COMMON_RET]] ; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 @@ -81,9 +81,9 @@ declare i32 @__kmpc_target_init(ptr, ptr) local_unnamed_addr ; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid.) #1 { -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -; CHECK-SAME: (ptr noalias captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias captures(none) [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define internal void @__omp_outlined__( +; CHECK-SAME: ptr noalias captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias captures(none) [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: ret void ; entry: @@ -92,9 +92,9 @@ entry: ; Function Attrs: norecurse nounwind define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 { -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define internal void @__omp_outlined___wrapper( +; CHECK-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]]) #[[ATTR3]] ; CHECK-NEXT: ret void @@ -158,7 +158,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } ; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; CHECK: [[DOUBLE_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} ; CHECK: [[META8]] = !{!"double", [[META9:![0-9]+]], i64 0} ; CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0} ; CHECK: [[META10]] = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll index d1e006a704441..dec6a68478f09 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX @@ -30,13 +30,13 @@ ; NVPTX: @spmd_and_non_spmd_callee_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ;. define weak ptx_kernel void @spmd_callees(i1 %c) #0 { -; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees -; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; AMDGPU-LABEL: define weak ptx_kernel void @spmd_callees( +; AMDGPU-SAME: i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; AMDGPU-NEXT: call void @spmd_callees__debug(i1 [[C]]) ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@spmd_callees -; NVPTX-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; NVPTX-LABEL: define weak ptx_kernel void @spmd_callees( +; NVPTX-SAME: i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; NVPTX-NEXT: call void @spmd_callees__debug(i1 [[C]]) ; NVPTX-NEXT: ret void ; @@ -45,71 +45,71 @@ define weak ptx_kernel void @spmd_callees(i1 %c) #0 { } define internal void @spmd_callees__debug(i1 %c) { -; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees__debug -; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @spmd_callees__debug( +; AMDGPU-SAME: i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]] ; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2 ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2 -; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; AMDGPU: 3: +; AMDGPU-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; AMDGPU: [[BB3]]: ; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] -; AMDGPU-NEXT: br label [[TMP7:%.*]] -; AMDGPU: 4: -; AMDGPU-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]] -; AMDGPU: 5: +; AMDGPU-NEXT: br label %[[BB7:.*]] +; AMDGPU: [[BB4]]: +; AMDGPU-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; AMDGPU: [[BB5]]: ; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] -; AMDGPU-NEXT: br label [[TMP7]] -; AMDGPU: 6: +; AMDGPU-NEXT: br label %[[BB7]] +; AMDGPU: [[BB6]]: ; AMDGPU-NEXT: unreachable -; AMDGPU: 7: +; AMDGPU: [[BB7]]: ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@spmd_callees__debug -; NVPTX-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @spmd_callees__debug( +; NVPTX-SAME: i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_kernel_environment, ptr null) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]] ; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2 ; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2 -; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; NVPTX: 3: +; NVPTX-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; NVPTX: [[BB3]]: ; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] -; NVPTX-NEXT: br label [[TMP7:%.*]] -; NVPTX: 4: -; NVPTX-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]] -; NVPTX: 5: +; NVPTX-NEXT: br label %[[BB7:.*]] +; NVPTX: [[BB4]]: +; NVPTX-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; NVPTX: [[BB5]]: ; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] -; NVPTX-NEXT: br label [[TMP7]] -; NVPTX: 6: +; NVPTX-NEXT: br label %[[BB7]] +; NVPTX: [[BB6]]: ; NVPTX-NEXT: unreachable -; NVPTX: 7: +; NVPTX: [[BB7]]: ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; entry: %.zero.addr = alloca ptr, align 8, addrspace(5) @@ -134,43 +134,43 @@ user_code.entry: ; preds = %entry } define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined_spmd_amenable1( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*]]: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU: for.cond: -; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU: [[FOR_COND]]: +; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU: for.cond.cleanup: +; AMDGPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU: [[FOR_COND_CLEANUP]]: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; AMDGPU-NEXT: ret void -; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] +; AMDGPU: [[FOR_BODY]]: +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined_spmd_amenable1( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*]]: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-NEXT: br label [[FOR_COND:%.*]] -; NVPTX: for.cond: -; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-NEXT: br label %[[FOR_COND:.*]] +; NVPTX: [[FOR_COND]]: +; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX: for.cond.cleanup: +; NVPTX-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX: [[FOR_COND_CLEANUP]]: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; NVPTX-NEXT: ret void -; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] +; NVPTX: [[FOR_BODY]]: +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; entry: %captured_vars_addrs = alloca ptr, align 8, addrspace(5) @@ -194,15 +194,15 @@ for.body: ; preds = %for.cond } define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__1( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__1( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; NVPTX-NEXT: ret void ; @@ -213,9 +213,9 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__1_wrapper( +; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -226,9 +226,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__1_wrapper( +; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -254,48 +254,48 @@ entry: } define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable2 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined_spmd_amenable2( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*]]: ; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr ; AMDGPU-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR6]] -; AMDGPU-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU: for.cond: -; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU: [[FOR_COND]]: +; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU: for.cond.cleanup: +; AMDGPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU: [[FOR_COND_CLEANUP]]: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-NEXT: ret void -; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] +; AMDGPU: [[FOR_BODY]]: +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable2 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined_spmd_amenable2( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*]]: ; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; NVPTX-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR6]] -; NVPTX-NEXT: br label [[FOR_COND:%.*]] -; NVPTX: for.cond: -; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-NEXT: br label %[[FOR_COND:.*]] +; NVPTX: [[FOR_COND]]: +; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX: for.cond.cleanup: +; NVPTX-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX: [[FOR_COND_CLEANUP]]: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-NEXT: ret void -; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] +; NVPTX: [[FOR_BODY]]: +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; entry: %captured_vars_addrs = alloca ptr, align 8, addrspace(5) @@ -322,15 +322,15 @@ for.body: ; preds = %for.cond } define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__3( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__3( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-NEXT: ret void ; @@ -341,9 +341,9 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__3_wrapper( +; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -354,9 +354,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__3_wrapper( +; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -383,9 +383,9 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 { -; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callee -; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define weak ptx_kernel void @spmd_and_non_spmd_callee( +; AMDGPU-SAME: i1 [[C:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -393,62 +393,62 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 { ; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callee_kernel_environment, ptr null) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU: is_worker_check: +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU: [[IS_WORKER_CHECK]]: ; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-NEXT: ret void -; AMDGPU: worker_state_machine.is_active.check: -; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU: worker_state_machine.parallel_region.fallback.execute: +; AMDGPU: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable -; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; AMDGPU: 3: +; AMDGPU-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; AMDGPU: [[BB3]]: ; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] -; AMDGPU-NEXT: br label [[TMP7:%.*]] -; AMDGPU: 4: -; AMDGPU-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]] -; AMDGPU: 5: +; AMDGPU-NEXT: br label %[[BB7:.*]] +; AMDGPU: [[BB4]]: +; AMDGPU-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; AMDGPU: [[BB5]]: ; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] -; AMDGPU-NEXT: br label [[TMP7]] -; AMDGPU: 6: +; AMDGPU-NEXT: br label %[[BB7]] +; AMDGPU: [[BB6]]: ; AMDGPU-NEXT: unreachable -; AMDGPU: 7: +; AMDGPU: [[BB7]]: ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callee -; NVPTX-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define weak ptx_kernel void @spmd_and_non_spmd_callee( +; NVPTX-SAME: i1 [[C:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -456,57 +456,57 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 { ; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callee_kernel_environment, ptr null) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX: is_worker_check: +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX: [[IS_WORKER_CHECK]]: ; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-NEXT: ret void -; NVPTX: worker_state_machine.is_active.check: -; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX: worker_state_machine.parallel_region.fallback.execute: +; NVPTX: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX: thread.user_code.check: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable ; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable -; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; NVPTX: 3: +; NVPTX-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; NVPTX: [[BB3]]: ; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] -; NVPTX-NEXT: br label [[TMP7:%.*]] -; NVPTX: 4: -; NVPTX-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]] -; NVPTX: 5: +; NVPTX-NEXT: br label %[[BB7:.*]] +; NVPTX: [[BB4]]: +; NVPTX-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; NVPTX: [[BB5]]: ; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] -; NVPTX-NEXT: br label [[TMP7]] -; NVPTX: 6: +; NVPTX-NEXT: br label %[[BB7]] +; NVPTX: [[BB6]]: ; NVPTX-NEXT: unreachable -; NVPTX: 7: +; NVPTX: [[BB7]]: ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; entry: %.zero.addr = alloca ptr, align 8, addrspace(5) @@ -531,49 +531,49 @@ user_code.entry: ; preds = %entry } define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined_spmd_amenable3( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*]]: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; AMDGPU-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR10]] -; AMDGPU-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU: for.cond: -; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU: [[FOR_COND]]: +; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU: for.cond.cleanup: +; AMDGPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU: [[FOR_COND_CLEANUP]]: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]] ; AMDGPU-NEXT: ret void -; AMDGPU: for.body: -; AMDGPU-NEXT: store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] +; AMDGPU: [[FOR_BODY]]: +; AMDGPU-NEXT: store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined_spmd_amenable3( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*]]: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr ; NVPTX-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR10]] -; NVPTX-NEXT: br label [[FOR_COND:%.*]] -; NVPTX: for.cond: -; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-NEXT: br label %[[FOR_COND:.*]] +; NVPTX: [[FOR_COND]]: +; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX: for.cond.cleanup: +; NVPTX-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX: [[FOR_COND_CLEANUP]]: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]] ; NVPTX-NEXT: ret void -; NVPTX: for.body: -; NVPTX-NEXT: store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] +; NVPTX: [[FOR_BODY]]: +; NVPTX-NEXT: store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; entry: %captured_vars_addrs = alloca ptr, align 8, addrspace(5) @@ -600,21 +600,21 @@ for.body: ; preds = %for.cond } define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-LABEL: define internal void @__omp_outlined__5( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-LABEL: define internal void @__omp_outlined__5( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-NEXT: ret void ; @@ -628,9 +628,9 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__5_wrapper( +; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -639,13 +639,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { ; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR10]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__5_wrapper( +; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -654,7 +654,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 { ; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8 -; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] +; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[ANYPTR_TBAA20]] ; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -676,45 +676,45 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 { -; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees_metadata -; AMDGPU-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define weak ptx_kernel void @spmd_callees_metadata( +; AMDGPU-SAME: ptr [[FP:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_metadata_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@spmd_callees_metadata -; NVPTX-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define weak ptx_kernel void @spmd_callees_metadata( +; NVPTX-SAME: ptr [[FP:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_metadata_kernel_environment, ptr null) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; entry: %.zero.addr = alloca ptr, align 8, addrspace(5) @@ -739,9 +739,9 @@ user_code.entry: ; preds = %entry ; Function Attrs: alwaysinline convergent norecurse nounwind define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { -; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callees_metadata -; AMDGPU-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata( +; AMDGPU-SAME: ptr [[FP:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -749,61 +749,61 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { ; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callees_metadata_kernel_environment, ptr null) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU: is_worker_check: +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; AMDGPU: [[IS_WORKER_CHECK]]: ; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_BEGIN]]: ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_FINISHED]]: ; AMDGPU-NEXT: ret void -; AMDGPU: worker_state_machine.is_active.check: -; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU: worker_state_machine.parallel_region.fallback.execute: +; AMDGPU: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; AMDGPU: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: [[THREAD_USER_CODE_CHECK]]: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU: common.ret: +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; AMDGPU: [[COMMON_RET]]: ; AMDGPU-NEXT: ret void -; AMDGPU: user_code.entry: +; AMDGPU: [[USER_CODE_ENTRY]]: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external -; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; AMDGPU: 3: +; AMDGPU-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; AMDGPU: [[BB3]]: ; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) -; AMDGPU-NEXT: br label [[TMP7:%.*]] -; AMDGPU: 4: -; AMDGPU-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]] -; AMDGPU: 5: +; AMDGPU-NEXT: br label %[[BB7:.*]] +; AMDGPU: [[BB4]]: +; AMDGPU-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; AMDGPU: [[BB5]]: ; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) -; AMDGPU-NEXT: br label [[TMP7]] -; AMDGPU: 6: +; AMDGPU-NEXT: br label %[[BB7]] +; AMDGPU: [[BB6]]: ; AMDGPU-NEXT: unreachable -; AMDGPU: 7: +; AMDGPU: [[BB7]]: ; AMDGPU-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-NEXT: br label [[COMMON_RET]] +; AMDGPU-NEXT: br label %[[COMMON_RET]] ; -; NVPTX-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callees_metadata -; NVPTX-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata( +; NVPTX-SAME: ptr [[FP:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr @@ -811,56 +811,56 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { ; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callees_metadata_kernel_environment, ptr null) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX: is_worker_check: +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label %[[IS_WORKER_CHECK:.*]], label %[[THREAD_USER_CODE_CHECK:.*]] +; NVPTX: [[IS_WORKER_CHECK]]: ; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() ; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() ; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label %[[WORKER_STATE_MACHINE_BEGIN:.*]], label %[[WORKER_STATE_MACHINE_FINISHED:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_BEGIN]]: ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label %[[WORKER_STATE_MACHINE_FINISHED]], label %[[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_FINISHED]]: ; NVPTX-NEXT: ret void -; NVPTX: worker_state_machine.is_active.check: -; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX: worker_state_machine.parallel_region.fallback.execute: +; NVPTX: [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK]]: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:.*]], label %[[WORKER_STATE_MACHINE_DONE_BARRIER:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE]]: ; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_PARALLEL_REGION_END:.*]] +; NVPTX: [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]: ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: [[WORKER_STATE_MACHINE_DONE_BARRIER]]: ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX: thread.user_code.check: +; NVPTX-NEXT: br label %[[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: [[THREAD_USER_CODE_CHECK]]: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX: common.ret: +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[COMMON_RET:.*]] +; NVPTX: [[COMMON_RET]]: ; NVPTX-NEXT: ret void -; NVPTX: user_code.entry: +; NVPTX: [[USER_CODE_ENTRY]]: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external -; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; NVPTX: 3: +; NVPTX-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; NVPTX: [[BB3]]: ; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) -; NVPTX-NEXT: br label [[TMP7:%.*]] -; NVPTX: 4: -; NVPTX-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]] -; NVPTX: 5: +; NVPTX-NEXT: br label %[[BB7:.*]] +; NVPTX: [[BB4]]: +; NVPTX-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; NVPTX: [[BB5]]: ; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) -; NVPTX-NEXT: br label [[TMP7]] -; NVPTX: 6: +; NVPTX-NEXT: br label %[[BB7]] +; NVPTX: [[BB6]]: ; NVPTX-NEXT: unreachable -; NVPTX: 7: +; NVPTX: [[BB7]]: ; NVPTX-NEXT: call void @__kmpc_target_deinit() -; NVPTX-NEXT: br label [[COMMON_RET]] +; NVPTX-NEXT: br label %[[COMMON_RET]] ; entry: %.zero.addr = alloca ptr, align 8, addrspace(5) @@ -884,39 +884,39 @@ user_code.entry: ; preds = %entry } define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable_external -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU: for.cond: -; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; AMDGPU-LABEL: define void @__omp_outlined_spmd_amenable_external( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*]]: +; AMDGPU-NEXT: br label %[[FOR_COND:.*]] +; AMDGPU: [[FOR_COND]]: +; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU: for.cond.cleanup: +; AMDGPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; AMDGPU: [[FOR_COND_CLEANUP]]: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-NEXT: ret void -; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] +; AMDGPU: [[FOR_BODY]]: +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable_external -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: br label [[FOR_COND:%.*]] -; NVPTX: for.cond: -; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; NVPTX-LABEL: define void @__omp_outlined_spmd_amenable_external( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*]]: +; NVPTX-NEXT: br label %[[FOR_COND:.*]] +; NVPTX: [[FOR_COND]]: +; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX: for.cond.cleanup: +; NVPTX-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]] +; NVPTX: [[FOR_COND_CLEANUP]]: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-NEXT: ret void -; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] +; NVPTX: [[FOR_BODY]]: +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; entry: br label %for.cond @@ -938,14 +938,14 @@ for.body: ; preds = %for.cond } define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__7( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__7( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: ret void ; entry: @@ -954,14 +954,14 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined__7_wrapper( +; AMDGPU-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined__7_wrapper( +; NVPTX-SAME: i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: ret void ; entry: @@ -969,13 +969,13 @@ entry: } define void @__omp_outlined_not_spmd_amenable_external(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable_external -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-LABEL: define void @__omp_outlined_not_spmd_amenable_external( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { ; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTGLOBAL_TID_]], ptr [[DOTBOUND_TID_]]) ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable_external -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-LABEL: define void @__omp_outlined_not_spmd_amenable_external( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { ; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTGLOBAL_TID_]], ptr [[DOTBOUND_TID_]]) ; NVPTX-NEXT: ret void ; @@ -984,15 +984,15 @@ define void @__omp_outlined_not_spmd_amenable_external(ptr noalias %.global_tid. } define internal void @__omp_outlined_not_spmd_amenable(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; AMDGPU-NEXT: entry: +; AMDGPU-LABEL: define internal void @__omp_outlined_not_spmd_amenable( +; AMDGPU-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; AMDGPU-NEXT: [[ENTRY:.*:]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { -; NVPTX-NEXT: entry: +; NVPTX-LABEL: define internal void @__omp_outlined_not_spmd_amenable( +; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { +; NVPTX-NEXT: [[ENTRY:.*:]] ; NVPTX-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-NEXT: ret void ; @@ -1020,12 +1020,12 @@ declare void @unknowni32p(ptr) #5 declare void @llvm.lifetime.start.p0(ptr captures(none)) #6 define weak i32 @__kmpc_target_init(ptr %0, ptr %1) { -; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; AMDGPU-LABEL: define weak i32 @__kmpc_target_init( +; AMDGPU-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; AMDGPU-NEXT: ret i32 0 ; -; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; NVPTX-LABEL: define weak i32 @__kmpc_target_init( +; NVPTX-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; NVPTX-NEXT: ret i32 0 ; ret i32 0 @@ -1150,7 +1150,7 @@ attributes #8 = { nounwind } ; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} ; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; AMDGPU: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} ; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} ; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} ; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"} @@ -1158,7 +1158,7 @@ attributes #8 = { nounwind } ; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"} ; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"} ; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} -; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; AMDGPU: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} ; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} @@ -1175,7 +1175,7 @@ attributes #8 = { nounwind } ; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} ; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +; NVPTX: [[INT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} ; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} ; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} ; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"} @@ -1183,7 +1183,7 @@ attributes #8 = { nounwind } ; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"} ; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"} ; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} -; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} +; NVPTX: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} ; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll index 1fe3fde61f410..92e625deb11b1 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -O3 < %s | FileCheck %s ; Check unrolling / SLP vectorization where the order of lanes is important for @@ -11,9 +11,9 @@ target triple = "aarch64" ; Function Attrs: nounwind uwtable define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 noundef %ip2) #0 { -; CHECK-LABEL: define range(i32 0, 65536) i32 @slpordering -; CHECK-SAME: (ptr noundef readonly captures(none) [[P1:%.*]], i32 noundef [[IP1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], i32 noundef [[IP2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define range(i32 0, 65536) i32 @slpordering( +; CHECK-SAME: ptr noundef readonly captures(none) [[P1:%.*]], i32 noundef [[IP1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], i32 noundef [[IP2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[IP1]] to i64 ; CHECK-NEXT: [[IDX_EXT63:%.*]] = sext i32 [[IP2]] to i64 ; CHECK-NEXT: [[RRRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 4 @@ -30,26 +30,26 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 ; CHECK-NEXT: [[RDD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64_1]], i64 [[IDX_EXT63]] ; CHECK-NEXT: [[RRRAYIDX3_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR_2]], i64 4 ; CHECK-NEXT: [[RRRAYIDX5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64_2]], i64 4 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[RDD_PTR]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[RDD_PTR64]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_1]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_1]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i8>, ptr [[RDD_PTR_1]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_1]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_2]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_2]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[RDD_PTR_2]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1, !tbaa [[CHAR_TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[RDD_PTR]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[RDD_PTR64]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_1]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_1]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i8>, ptr [[RDD_PTR_1]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_1]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_2]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_2]], align 1, !tbaa [[CHAR_TBAA0]] +; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[RDD_PTR_2]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP14]], <16 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[TMP17]] to <16 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_2]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_2]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP21]], <16 x i32> @@ -57,14 +57,14 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = sub nsw <16 x i32> [[TMP18]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_3]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP27:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_3]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP27]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i32> -; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_3]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_3]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <16 x i8> [[TMP35]], <16 x i8> [[TMP36]], <16 x i32> @@ -482,3 +482,8 @@ attributes #2 = { nounwind } !11 = distinct !{!11, !12} !12 = !{!"llvm.loop.mustprogress"} !13 = distinct !{!13, !12} +;. +; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll index 0967736b6740a..4c7e39d31b5c6 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -O3 < %s | FileCheck %s --check-prefixes=CHECK-O3 ; RUN: opt -S -passes="default,default" < %s | FileCheck %s --check-prefixes=CHECK-LTO @@ -11,9 +11,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[ENTRY:.*:]] ; CHECK-O3-NEXT: [[IDX_EXT8:%.*]] = sext i32 [[S_P2]] to i64 ; CHECK-O3-NEXT: [[IDX_EXT:%.*]] = sext i32 [[S_P1]] to i64 -; CHECK-O3-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[P1]], align 1, !tbaa [[TBAA0:![0-9]+]] +; CHECK-O3-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[P1]], align 1, !tbaa [[CHAR_TBAA0:![0-9]+]] ; CHECK-O3-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P2]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P2]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP4:%.*]] = sub nsw <16 x i16> [[TMP1]], [[TMP3]] ; CHECK-O3-NEXT: [[TMP5:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP4]], i1 false) @@ -21,9 +21,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]]) ; CHECK-O3-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[ADD_PTR]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[ADD_PTR]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[TMP8]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr [[ADD_PTR9]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr [[ADD_PTR9]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP12:%.*]] = sub nsw <16 x i16> [[TMP9]], [[TMP11]] ; CHECK-O3-NEXT: [[TMP13:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP12]], i1 false) @@ -32,9 +32,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_1:%.*]] = add i32 [[TMP15]], [[TMP7]] ; CHECK-O3-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP16:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP16:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP17:%.*]] = zext <16 x i8> [[TMP16]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP18:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_1]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP18:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_1]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP20:%.*]] = sub nsw <16 x i16> [[TMP17]], [[TMP19]] ; CHECK-O3-NEXT: [[TMP21:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP20]], i1 false) @@ -43,9 +43,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_2:%.*]] = add i32 [[TMP23]], [[OP_RDX_1]] ; CHECK-O3-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_1]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP26:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_2]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP26:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_2]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP27:%.*]] = zext <16 x i8> [[TMP26]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP28:%.*]] = sub nsw <16 x i16> [[TMP25]], [[TMP27]] ; CHECK-O3-NEXT: [[TMP29:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP28]], i1 false) @@ -54,9 +54,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_3:%.*]] = add i32 [[TMP31]], [[OP_RDX_2]] ; CHECK-O3-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_2]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP32:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP32:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP34:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_3]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP34:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_3]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[TMP34]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP36:%.*]] = sub nsw <16 x i16> [[TMP33]], [[TMP35]] ; CHECK-O3-NEXT: [[TMP37:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP36]], i1 false) @@ -65,9 +65,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_4:%.*]] = add i32 [[TMP39]], [[OP_RDX_3]] ; CHECK-O3-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_3]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_4:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_3]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP40:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP40:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP41:%.*]] = zext <16 x i8> [[TMP40]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP42:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_4]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP42:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_4]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP44:%.*]] = sub nsw <16 x i16> [[TMP41]], [[TMP43]] ; CHECK-O3-NEXT: [[TMP45:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP44]], i1 false) @@ -76,9 +76,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_5:%.*]] = add i32 [[TMP47]], [[OP_RDX_4]] ; CHECK-O3-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_4]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_5:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_4]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP48:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP48:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP49:%.*]] = zext <16 x i8> [[TMP48]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP50:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_5]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP50:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_5]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP51:%.*]] = zext <16 x i8> [[TMP50]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP52:%.*]] = sub nsw <16 x i16> [[TMP49]], [[TMP51]] ; CHECK-O3-NEXT: [[TMP53:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP52]], i1 false) @@ -87,9 +87,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_6:%.*]] = add i32 [[TMP55]], [[OP_RDX_5]] ; CHECK-O3-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_5]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_6:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_5]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP56:%.*]] = load <16 x i8>, ptr [[ADD_PTR_6]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP56:%.*]] = load <16 x i8>, ptr [[ADD_PTR_6]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP57:%.*]] = zext <16 x i8> [[TMP56]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP58:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_6]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP58:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_6]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP59:%.*]] = zext <16 x i8> [[TMP58]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP60:%.*]] = sub nsw <16 x i16> [[TMP57]], [[TMP59]] ; CHECK-O3-NEXT: [[TMP61:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP60]], i1 false) @@ -98,9 +98,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_7:%.*]] = add i32 [[TMP63]], [[OP_RDX_6]] ; CHECK-O3-NEXT: [[ADD_PTR_7:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_6]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_7:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_6]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP64:%.*]] = load <16 x i8>, ptr [[ADD_PTR_7]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP64:%.*]] = load <16 x i8>, ptr [[ADD_PTR_7]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP65:%.*]] = zext <16 x i8> [[TMP64]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP66:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_7]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP66:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_7]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP67:%.*]] = zext <16 x i8> [[TMP66]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP68:%.*]] = sub nsw <16 x i16> [[TMP65]], [[TMP67]] ; CHECK-O3-NEXT: [[TMP69:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP68]], i1 false) @@ -109,9 +109,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_8:%.*]] = add i32 [[TMP71]], [[OP_RDX_7]] ; CHECK-O3-NEXT: [[ADD_PTR_8:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_7]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_8:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_7]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP72:%.*]] = load <16 x i8>, ptr [[ADD_PTR_8]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP72:%.*]] = load <16 x i8>, ptr [[ADD_PTR_8]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP73:%.*]] = zext <16 x i8> [[TMP72]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP74:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_8]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP74:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_8]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP75:%.*]] = zext <16 x i8> [[TMP74]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP76:%.*]] = sub nsw <16 x i16> [[TMP73]], [[TMP75]] ; CHECK-O3-NEXT: [[TMP77:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP76]], i1 false) @@ -120,9 +120,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_9:%.*]] = add i32 [[TMP79]], [[OP_RDX_8]] ; CHECK-O3-NEXT: [[ADD_PTR_9:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_8]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_9:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_8]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP80:%.*]] = load <16 x i8>, ptr [[ADD_PTR_9]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP80:%.*]] = load <16 x i8>, ptr [[ADD_PTR_9]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP81:%.*]] = zext <16 x i8> [[TMP80]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP82:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_9]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP82:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_9]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP83:%.*]] = zext <16 x i8> [[TMP82]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP84:%.*]] = sub nsw <16 x i16> [[TMP81]], [[TMP83]] ; CHECK-O3-NEXT: [[TMP85:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP84]], i1 false) @@ -131,9 +131,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_10:%.*]] = add i32 [[TMP87]], [[OP_RDX_9]] ; CHECK-O3-NEXT: [[ADD_PTR_10:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_9]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_10:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_9]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP88:%.*]] = load <16 x i8>, ptr [[ADD_PTR_10]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP88:%.*]] = load <16 x i8>, ptr [[ADD_PTR_10]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP89:%.*]] = zext <16 x i8> [[TMP88]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP90:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_10]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP90:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_10]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP91:%.*]] = zext <16 x i8> [[TMP90]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP92:%.*]] = sub nsw <16 x i16> [[TMP89]], [[TMP91]] ; CHECK-O3-NEXT: [[TMP93:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP92]], i1 false) @@ -142,9 +142,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_11:%.*]] = add i32 [[TMP95]], [[OP_RDX_10]] ; CHECK-O3-NEXT: [[ADD_PTR_11:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_10]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_11:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_10]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP96:%.*]] = load <16 x i8>, ptr [[ADD_PTR_11]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP96:%.*]] = load <16 x i8>, ptr [[ADD_PTR_11]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP97:%.*]] = zext <16 x i8> [[TMP96]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP98:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_11]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP98:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_11]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP99:%.*]] = zext <16 x i8> [[TMP98]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP100:%.*]] = sub nsw <16 x i16> [[TMP97]], [[TMP99]] ; CHECK-O3-NEXT: [[TMP101:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP100]], i1 false) @@ -153,9 +153,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_12:%.*]] = add i32 [[TMP103]], [[OP_RDX_11]] ; CHECK-O3-NEXT: [[ADD_PTR_12:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_11]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_12:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_11]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP104:%.*]] = load <16 x i8>, ptr [[ADD_PTR_12]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP104:%.*]] = load <16 x i8>, ptr [[ADD_PTR_12]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP105:%.*]] = zext <16 x i8> [[TMP104]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP106:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_12]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP106:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_12]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP107:%.*]] = zext <16 x i8> [[TMP106]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP108:%.*]] = sub nsw <16 x i16> [[TMP105]], [[TMP107]] ; CHECK-O3-NEXT: [[TMP109:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP108]], i1 false) @@ -164,9 +164,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_13:%.*]] = add i32 [[TMP111]], [[OP_RDX_12]] ; CHECK-O3-NEXT: [[ADD_PTR_13:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_12]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_13:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_12]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP112:%.*]] = load <16 x i8>, ptr [[ADD_PTR_13]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP112:%.*]] = load <16 x i8>, ptr [[ADD_PTR_13]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP113:%.*]] = zext <16 x i8> [[TMP112]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP114:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_13]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP114:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_13]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP115:%.*]] = zext <16 x i8> [[TMP114]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP116:%.*]] = sub nsw <16 x i16> [[TMP113]], [[TMP115]] ; CHECK-O3-NEXT: [[TMP117:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP116]], i1 false) @@ -175,9 +175,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-O3-NEXT: [[OP_RDX_14:%.*]] = add i32 [[TMP119]], [[OP_RDX_13]] ; CHECK-O3-NEXT: [[ADD_PTR_14:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_13]], i64 [[IDX_EXT]] ; CHECK-O3-NEXT: [[ADD_PTR9_14:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_13]], i64 [[IDX_EXT8]] -; CHECK-O3-NEXT: [[TMP120:%.*]] = load <16 x i8>, ptr [[ADD_PTR_14]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP120:%.*]] = load <16 x i8>, ptr [[ADD_PTR_14]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP121:%.*]] = zext <16 x i8> [[TMP120]] to <16 x i16> -; CHECK-O3-NEXT: [[TMP122:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_14]], align 1, !tbaa [[TBAA0]] +; CHECK-O3-NEXT: [[TMP122:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_14]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-O3-NEXT: [[TMP123:%.*]] = zext <16 x i8> [[TMP122]] to <16 x i16> ; CHECK-O3-NEXT: [[TMP124:%.*]] = sub nsw <16 x i16> [[TMP121]], [[TMP123]] ; CHECK-O3-NEXT: [[TMP125:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP124]], i1 false) @@ -191,9 +191,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[ENTRY:.*:]] ; CHECK-LTO-NEXT: [[IDX_EXT8:%.*]] = sext i32 [[S_P2]] to i64 ; CHECK-LTO-NEXT: [[IDX_EXT:%.*]] = sext i32 [[S_P1]] to i64 -; CHECK-LTO-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[P1]], align 1, !tbaa [[TBAA0:![0-9]+]] +; CHECK-LTO-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[P1]], align 1, !tbaa [[CHAR_TBAA0:![0-9]+]] ; CHECK-LTO-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P2]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P2]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP4:%.*]] = sub nsw <16 x i16> [[TMP1]], [[TMP3]] ; CHECK-LTO-NEXT: [[TMP5:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP4]], i1 true) @@ -201,9 +201,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[TMP44:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP36]]) ; CHECK-LTO-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[ADD_PTR]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[ADD_PTR]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[ADD_PTR9]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[ADD_PTR9]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[TMP8]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP10:%.*]] = sub nsw <16 x i16> [[TMP7]], [[TMP9]] ; CHECK-LTO-NEXT: [[TMP11:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP10]], i1 true) @@ -212,9 +212,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_1:%.*]] = add i32 [[TMP60]], [[TMP44]] ; CHECK-LTO-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP12:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP12:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP13:%.*]] = zext <16 x i8> [[TMP12]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP14:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_1]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP14:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_1]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP15:%.*]] = zext <16 x i8> [[TMP14]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP16:%.*]] = sub nsw <16 x i16> [[TMP13]], [[TMP15]] ; CHECK-LTO-NEXT: [[TMP17:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP16]], i1 true) @@ -223,9 +223,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_2:%.*]] = add i32 [[OP_RDX_1]], [[TMP76]] ; CHECK-LTO-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_1]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP18:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP18:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP20:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_2]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP20:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_2]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP21:%.*]] = zext <16 x i8> [[TMP20]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP22:%.*]] = sub nsw <16 x i16> [[TMP19]], [[TMP21]] ; CHECK-LTO-NEXT: [[TMP23:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP22]], i1 true) @@ -234,9 +234,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_3:%.*]] = add i32 [[OP_RDX_2]], [[TMP92]] ; CHECK-LTO-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_2]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP26:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_3]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP26:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_3]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP27:%.*]] = zext <16 x i8> [[TMP26]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP28:%.*]] = sub nsw <16 x i16> [[TMP25]], [[TMP27]] ; CHECK-LTO-NEXT: [[TMP29:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP28]], i1 true) @@ -245,9 +245,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_4:%.*]] = add i32 [[OP_RDX_3]], [[TMP108]] ; CHECK-LTO-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_3]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_4:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_3]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP30:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP30:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP31:%.*]] = zext <16 x i8> [[TMP30]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP32:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_4]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP32:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_4]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP34:%.*]] = sub nsw <16 x i16> [[TMP31]], [[TMP33]] ; CHECK-LTO-NEXT: [[TMP35:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP34]], i1 true) @@ -256,9 +256,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_5:%.*]] = add i32 [[OP_RDX_4]], [[TMP117]] ; CHECK-LTO-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_4]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_5:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_4]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP37:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP37:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP38:%.*]] = zext <16 x i8> [[TMP37]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP39:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_5]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP39:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_5]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP41:%.*]] = sub nsw <16 x i16> [[TMP38]], [[TMP40]] ; CHECK-LTO-NEXT: [[TMP42:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP41]], i1 true) @@ -267,9 +267,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_6:%.*]] = add i32 [[OP_RDX_5]], [[TMP118]] ; CHECK-LTO-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_5]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_6:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_5]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP45:%.*]] = load <16 x i8>, ptr [[ADD_PTR_6]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP45:%.*]] = load <16 x i8>, ptr [[ADD_PTR_6]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP46:%.*]] = zext <16 x i8> [[TMP45]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP47:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_6]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP47:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_6]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP49:%.*]] = sub nsw <16 x i16> [[TMP46]], [[TMP48]] ; CHECK-LTO-NEXT: [[TMP50:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP49]], i1 true) @@ -278,9 +278,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_7:%.*]] = add i32 [[OP_RDX_6]], [[TMP120]] ; CHECK-LTO-NEXT: [[ADD_PTR_7:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_6]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_7:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_6]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP53:%.*]] = load <16 x i8>, ptr [[ADD_PTR_7]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP53:%.*]] = load <16 x i8>, ptr [[ADD_PTR_7]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP54:%.*]] = zext <16 x i8> [[TMP53]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP55:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_7]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP55:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_7]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP57:%.*]] = sub nsw <16 x i16> [[TMP54]], [[TMP56]] ; CHECK-LTO-NEXT: [[TMP58:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP57]], i1 true) @@ -289,9 +289,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_8:%.*]] = add i32 [[OP_RDX_7]], [[TMP121]] ; CHECK-LTO-NEXT: [[ADD_PTR_8:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_7]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_8:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_7]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP61:%.*]] = load <16 x i8>, ptr [[ADD_PTR_8]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP61:%.*]] = load <16 x i8>, ptr [[ADD_PTR_8]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP62:%.*]] = zext <16 x i8> [[TMP61]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP63:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_8]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP63:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_8]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP64:%.*]] = zext <16 x i8> [[TMP63]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP65:%.*]] = sub nsw <16 x i16> [[TMP62]], [[TMP64]] ; CHECK-LTO-NEXT: [[TMP66:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP65]], i1 true) @@ -300,9 +300,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_9:%.*]] = add i32 [[OP_RDX_8]], [[TMP122]] ; CHECK-LTO-NEXT: [[ADD_PTR_9:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_8]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_9:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_8]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP69:%.*]] = load <16 x i8>, ptr [[ADD_PTR_9]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP69:%.*]] = load <16 x i8>, ptr [[ADD_PTR_9]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP70:%.*]] = zext <16 x i8> [[TMP69]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP71:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_9]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP71:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_9]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP72:%.*]] = zext <16 x i8> [[TMP71]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP73:%.*]] = sub nsw <16 x i16> [[TMP70]], [[TMP72]] ; CHECK-LTO-NEXT: [[TMP74:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP73]], i1 true) @@ -311,9 +311,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_10:%.*]] = add i32 [[OP_RDX_9]], [[TMP123]] ; CHECK-LTO-NEXT: [[ADD_PTR_10:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_9]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_10:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_9]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP77:%.*]] = load <16 x i8>, ptr [[ADD_PTR_10]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP77:%.*]] = load <16 x i8>, ptr [[ADD_PTR_10]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP78:%.*]] = zext <16 x i8> [[TMP77]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP79:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_10]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP79:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_10]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP80:%.*]] = zext <16 x i8> [[TMP79]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP81:%.*]] = sub nsw <16 x i16> [[TMP78]], [[TMP80]] ; CHECK-LTO-NEXT: [[TMP82:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP81]], i1 true) @@ -322,9 +322,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_11:%.*]] = add i32 [[OP_RDX_10]], [[TMP124]] ; CHECK-LTO-NEXT: [[ADD_PTR_11:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_10]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_11:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_10]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP85:%.*]] = load <16 x i8>, ptr [[ADD_PTR_11]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP85:%.*]] = load <16 x i8>, ptr [[ADD_PTR_11]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP86:%.*]] = zext <16 x i8> [[TMP85]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP87:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_11]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP87:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_11]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP88:%.*]] = zext <16 x i8> [[TMP87]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP89:%.*]] = sub nsw <16 x i16> [[TMP86]], [[TMP88]] ; CHECK-LTO-NEXT: [[TMP90:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP89]], i1 true) @@ -333,9 +333,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_12:%.*]] = add i32 [[OP_RDX_11]], [[TMP125]] ; CHECK-LTO-NEXT: [[ADD_PTR_12:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_11]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_12:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_11]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP93:%.*]] = load <16 x i8>, ptr [[ADD_PTR_12]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP93:%.*]] = load <16 x i8>, ptr [[ADD_PTR_12]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP94:%.*]] = zext <16 x i8> [[TMP93]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP95:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_12]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP95:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_12]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP96:%.*]] = zext <16 x i8> [[TMP95]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP97:%.*]] = sub nsw <16 x i16> [[TMP94]], [[TMP96]] ; CHECK-LTO-NEXT: [[TMP98:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP97]], i1 true) @@ -344,9 +344,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_13:%.*]] = add i32 [[OP_RDX_12]], [[TMP126]] ; CHECK-LTO-NEXT: [[ADD_PTR_13:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_12]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_13:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_12]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP101:%.*]] = load <16 x i8>, ptr [[ADD_PTR_13]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP101:%.*]] = load <16 x i8>, ptr [[ADD_PTR_13]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP102:%.*]] = zext <16 x i8> [[TMP101]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP103:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_13]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP103:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_13]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP104:%.*]] = zext <16 x i8> [[TMP103]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP105:%.*]] = sub nsw <16 x i16> [[TMP102]], [[TMP104]] ; CHECK-LTO-NEXT: [[TMP106:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP105]], i1 true) @@ -355,9 +355,9 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef %s_p1, ptr noundef %p2, ; CHECK-LTO-NEXT: [[OP_RDX_14:%.*]] = add i32 [[OP_RDX_13]], [[TMP119]] ; CHECK-LTO-NEXT: [[ADD_PTR_14:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_13]], i64 [[IDX_EXT]] ; CHECK-LTO-NEXT: [[ADD_PTR9_14:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR9_13]], i64 [[IDX_EXT8]] -; CHECK-LTO-NEXT: [[TMP109:%.*]] = load <16 x i8>, ptr [[ADD_PTR_14]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP109:%.*]] = load <16 x i8>, ptr [[ADD_PTR_14]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP110:%.*]] = zext <16 x i8> [[TMP109]] to <16 x i16> -; CHECK-LTO-NEXT: [[TMP111:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_14]], align 1, !tbaa [[TBAA0]] +; CHECK-LTO-NEXT: [[TMP111:%.*]] = load <16 x i8>, ptr [[ADD_PTR9_14]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-LTO-NEXT: [[TMP112:%.*]] = zext <16 x i8> [[TMP111]] to <16 x i16> ; CHECK-LTO-NEXT: [[TMP113:%.*]] = sub nsw <16 x i16> [[TMP110]], [[TMP112]] ; CHECK-LTO-NEXT: [[TMP114:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP113]], i1 true) @@ -489,11 +489,11 @@ attributes #3 = { nounwind } !13 = !{!"llvm.loop.mustprogress"} !14 = distinct !{!14, !13} ;. -; CHECK-O3: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK-O3: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK-O3: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0} ; CHECK-O3: [[META2]] = !{!"Simple C/C++ TBAA"} ;. -; CHECK-LTO: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK-LTO: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK-LTO: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0} ; CHECK-LTO: [[META2]] = !{!"Simple C/C++ TBAA"} ;. diff --git a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll index 5386bf939918a..13eed2e918aa0 100644 --- a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll +++ b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes='default' -S %s | FileCheck %s target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64" @@ -12,134 +12,135 @@ target triple = "systemz" ; that transform to produce optimal asm. define dso_local zeroext i32 @foo(ptr noundef %a) #0 { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: +; CHECK-LABEL: define dso_local zeroext i32 @foo( +; CHECK-SAME: ptr noundef readnone captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: tail call void @populate(ptr noundef nonnull @ARR) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: br label [[FOR_BODY4:%.*]] -; CHECK: for.body4: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY4]] ] -; CHECK-NEXT: [[SUM_11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7:%.*]], [[FOR_BODY4]] ] +; CHECK-NEXT: br label %[[FOR_BODY4:.*]] +; CHECK: [[FOR_BODY4]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY4]] ] +; CHECK-NEXT: [[SUM_11:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD_7:%.*]], %[[FOR_BODY4]] ] ; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i64 0, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[IDX_NEG]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !tbaa [[INT_TBAA3:![0-9]+]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[SUM_11]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_NEG:%.*]] = xor i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: [[ADD_PTR_110:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_NEG]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_110]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_110]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[ADD_111:%.*]] = add i32 [[TMP1]], [[ADD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_112_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_217:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_112_NEG]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ADD_PTR_217]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ADD_PTR_217]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[ADD_218:%.*]] = add i32 [[TMP2]], [[ADD_111]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_219_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_219_NEG]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ADD_PTR_3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ADD_PTR_3]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[TMP3]], [[ADD_218]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_3_NEG]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ADD_PTR_4]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ADD_PTR_4]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[TMP4]], [[ADD_3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_4_NEG]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADD_PTR_5]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADD_PTR_5]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[ADD_5:%.*]] = add i32 [[TMP5]], [[ADD_4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_5_NEG]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_6]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_6]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[ADD_6:%.*]] = add i32 [[TMP6]], [[ADD_5]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_6_NEG]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[ADD_7]] = add i32 [[TMP7]], [[ADD_6]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8 ; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], 32 -; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_BODY4_1:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.body4.1: -; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1_7:%.*]], [[FOR_BODY4_1]] ], [ 0, [[FOR_BODY4]] ] -; CHECK-NEXT: [[SUM_11_1:%.*]] = phi i32 [ [[ADD_1_7:%.*]], [[FOR_BODY4_1]] ], [ [[ADD_7]], [[FOR_BODY4]] ] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label %[[FOR_BODY4_1:.*]], label %[[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[FOR_BODY4_1]]: +; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1_7:%.*]], %[[FOR_BODY4_1]] ], [ 0, %[[FOR_BODY4]] ] +; CHECK-NEXT: [[SUM_11_1:%.*]] = phi i32 [ [[ADD_1_7:%.*]], %[[FOR_BODY4_1]] ], [ [[ADD_7]], %[[FOR_BODY4]] ] ; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i64 0, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[IDX_NEG_1]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_NEG:%.*]] = xor i64 [[INDVARS_IV_1]], -1 ; CHECK-NEXT: [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_NEG]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_1_NEG]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_2_NEG]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_3_NEG]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_4_NEG]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_5_NEG]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_6_NEG]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 1 ; CHECK-NEXT: [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8 ; CHECK-NEXT: [[EXITCOND_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1_7]], 32 -; CHECK-NEXT: br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]] -; CHECK: for.body4.2: -; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2_7:%.*]], [[FOR_BODY4_2]] ], [ 0, [[FOR_BODY4_1]] ] -; CHECK-NEXT: [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ], [ [[ADD_1_7]], [[FOR_BODY4_1]] ] +; CHECK-NEXT: br i1 [[EXITCOND_1_NOT_7]], label %[[FOR_BODY4_2:.*]], label %[[FOR_BODY4_1]], !llvm.loop [[LOOP7]] +; CHECK: [[FOR_BODY4_2]]: +; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2_7:%.*]], %[[FOR_BODY4_2]] ], [ 0, %[[FOR_BODY4_1]] ] +; CHECK-NEXT: [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], %[[FOR_BODY4_2]] ], [ [[ADD_1_7]], %[[FOR_BODY4_1]] ] ; CHECK-NEXT: [[IDX_NEG_2:%.*]] = sub nsw i64 0, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[IDX_NEG_2]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP24]], 3 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], [[SUM_11_2]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_NEG:%.*]] = xor i64 [[INDVARS_IV_2]], -1 ; CHECK-NEXT: [[ADD_PTR_2_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_NEG]] -; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP25]], 3 ; CHECK-NEXT: [[ADD_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD_2]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_1_NEG]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP26]], 3 ; CHECK-NEXT: [[ADD_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD_2_1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_2_NEG]] -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP27]], 3 ; CHECK-NEXT: [[ADD_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD_2_2]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_3_NEG]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP28]], 3 ; CHECK-NEXT: [[ADD_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD_2_3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_4_NEG]] -; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP29]], 3 ; CHECK-NEXT: [[ADD_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD_2_4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_5_NEG]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP30]], 3 ; CHECK-NEXT: [[ADD_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD_2_5]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_6_NEG]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[INT_TBAA3]] ; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP31]], 3 ; CHECK-NEXT: [[ADD_2_7]] = add i32 [[MUL_2_7]], [[ADD_2_6]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_7]] = add nuw nsw i64 [[INDVARS_IV_2]], 8 ; CHECK-NEXT: [[EXITCOND_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2_7]], 32 -; CHECK-NEXT: br i1 [[EXITCOND_2_NOT_7]], label [[FOR_INC5_2:%.*]], label [[FOR_BODY4_2]], !llvm.loop [[LOOP7]] -; CHECK: for.inc5.2: +; CHECK-NEXT: br i1 [[EXITCOND_2_NOT_7]], label %[[FOR_INC5_2:.*]], label %[[FOR_BODY4_2]], !llvm.loop [[LOOP7]] +; CHECK: [[FOR_INC5_2]]: ; CHECK-NEXT: ret i32 [[ADD_2_7]] ; entry: @@ -210,3 +211,11 @@ attributes #2 = { argmemonly nocallback nofree nosync nounwind willreturn } !7 = distinct !{!7, !8} !8 = !{!"llvm.loop.mustprogress"} !9 = distinct !{!9, !8} +;. +; CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]} +; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll index 7fe3f33430234..f42101ffe89aa 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -O3 -S | FileCheck %s ; RUN: opt < %s -passes="default" -S | FileCheck %s @@ -20,27 +20,28 @@ $_ZNSt14__array_traitsIiLm2EE6_S_refERA2_Kim = comdat any ; Function Attrs: mustprogress nounwind uwtable define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 dereferenceable(8) %arg1) #0 { -; CHECK-LABEL: @foo( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[I9:%.*]] = sdiv i32 [[ARG:%.*]], 128 +; CHECK-LABEL: define dso_local void @foo( +; CHECK-SAME: i32 noundef [[ARG:%.*]], ptr noundef nonnull writeonly align 4 captures(none) dereferenceable(8) [[ARG1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[I9:%.*]] = sdiv i32 [[ARG]], 128 ; CHECK-NEXT: [[I10:%.*]] = shl nsw i32 [[I9]], 7 ; CHECK-NEXT: [[ARG_OFF:%.*]] = add i32 [[ARG]], 127 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255 -; CHECK-NEXT: br i1 [[TMP0]], label [[BB12:%.*]], label [[BB13:%.*]] -; CHECK: bb12.loopexit: +; CHECK-NEXT: br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13:.*]] +; CHECK: [[BB12_LOOPEXIT:.*]]: ; CHECK-NEXT: [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64 ; CHECK-NEXT: [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[I3_SROA_8_0_INSERT_EXT]], 32 ; CHECK-NEXT: [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64 ; CHECK-NEXT: [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or disjoint i64 [[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]] -; CHECK-NEXT: br label [[BB12]] -; CHECK: bb12: -; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], [[BB12_LOOPEXIT:%.*]] ], [ 180388626456, [[BB:%.*]] ] -; CHECK-NEXT: store i64 [[TMP1]], ptr [[ARG1:%.*]], align 4, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], %[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ] +; CHECK-NEXT: store i64 [[TMP1]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]] ; CHECK-NEXT: ret void -; CHECK: bb13: -; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], [[BB13]] ], [ 42, [[BB]] ] -; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], [[BB13]] ], [ 24, [[BB]] ] -; CHECK-NEXT: [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], [[BB13]] ], [ 0, [[BB]] ] +; CHECK: [[BB13]]: +; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], %[[BB13]] ], [ 42, %[[BB]] ] +; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], %[[BB13]] ], [ 24, %[[BB]] ] +; CHECK-NEXT: [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, %[[BB]] ] ; CHECK-NEXT: [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]] ; CHECK-NEXT: [[I24:%.*]] = or disjoint i32 [[I4_05]], 1 ; CHECK-NEXT: [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]] @@ -50,7 +51,7 @@ define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 derefer ; CHECK-NEXT: [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]] ; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I4_05]], 4 ; CHECK-NEXT: [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]] -; CHECK-NEXT: br i1 [[I11_NOT_3]], label [[BB12_LOOPEXIT]], label [[BB13]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]] ; bb: %i = alloca i32, align 4 @@ -166,3 +167,11 @@ attributes #3 = { nounwind } !14 = !{!7, !7, i64 0} !15 = !{!16, !16, i64 0} !16 = !{!"long", !7, i64 0} +;. +; CHECK: [[CHAR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +; CHECK: [[META7]] = !{!"Simple C++ TBAA"} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]} +; CHECK: [[META9]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll index 00453e701ee51..7954ff051a33d 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes="default" -S < %s | FileCheck --check-prefixes=O1 %s ; RUN: opt -passes="default" -S < %s | FileCheck --check-prefixes=O2 %s ; RUN: opt -passes="default" -S < %s | FileCheck --check-prefixes=O3 %s @@ -14,125 +14,125 @@ target triple = "x86_64-unknown-linux-gnu" $_ZNSt6vectorIiSaIiEEixEm = comdat any define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 dereferenceable(24) %data, i64 noundef %numElems) { -; O1-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy -; O1-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; O1-NEXT: entry: +; O1-LABEL: define dso_local void @_Z7computeRSt6vectorIiSaIiEEy( +; O1-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O1-NEXT: [[ENTRY:.*]]: ; O1-NEXT: [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 ; O1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8 -; O1-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] -; O1: for.cond1.preheader: -; O1-NEXT: [[I_06:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; O1-NEXT: br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4:%.*]] -; O1: for.cond.cleanup: +; O1-NEXT: br label %[[FOR_COND1_PREHEADER:.*]] +; O1: [[FOR_COND1_PREHEADER]]: +; O1-NEXT: [[I_06:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_COND_CLEANUP3:.*]] ] +; O1-NEXT: br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4:.*]] +; O1: [[FOR_COND_CLEANUP:.*]]: ; O1-NEXT: ret void -; O1: for.cond.cleanup3: +; O1: [[FOR_COND_CLEANUP3]]: ; O1-NEXT: [[INC7]] = add nuw nsw i64 [[I_06]], 1 ; O1-NEXT: [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100 -; O1-NEXT: br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]] -; O1: for.body4: -; O1-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ 0, [[FOR_COND1_PREHEADER]] ] +; O1-NEXT: br i1 [[EXITCOND7_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]] +; O1: [[FOR_BODY4]]: +; O1-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], %[[FOR_BODY4]] ], [ 0, %[[FOR_COND1_PREHEADER]] ] ; O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05]] -; O1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA2:![0-9]+]] +; O1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] ; O1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -; O1-NEXT: store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA2]] +; O1-NEXT: store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA2]] ; O1-NEXT: [[INC5]] = add nuw i64 [[J_05]], 1 ; O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5]], [[NUMELEMS]] -; O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP6:![0-9]+]] +; O1-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4]], !llvm.loop [[LOOP6:![0-9]+]] ; -; O2-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy -; O2-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; O2-NEXT: entry: +; O2-LABEL: define dso_local void @_Z7computeRSt6vectorIiSaIiEEy( +; O2-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O2-NEXT: [[ENTRY:.*]]: ; O2-NEXT: [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 ; O2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8 ; O2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 ; O2-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8 ; O2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUMELEMS]], [[N_VEC]] -; O2-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] -; O2: for.cond1.preheader: -; O2-NEXT: [[I_06:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; O2-NEXT: br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]] -; O2: for.body4.preheader: -; O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER9:%.*]], label [[VECTOR_BODY:%.*]] -; O2: vector.body: -; O2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY4_PREHEADER]] ] +; O2-NEXT: br label %[[FOR_COND1_PREHEADER:.*]] +; O2: [[FOR_COND1_PREHEADER]]: +; O2-NEXT: [[I_06:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_COND_CLEANUP3:.*]] ] +; O2-NEXT: br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4_PREHEADER:.*]] +; O2: [[FOR_BODY4_PREHEADER]]: +; O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY4_PREHEADER9:.*]], label %[[VECTOR_BODY:.*]] +; O2: [[VECTOR_BODY]]: +; O2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], [ 0, %[[FOR_BODY4_PREHEADER]] ] ; O2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]] ; O2-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 -; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; O2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]] +; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] +; O2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]] ; O2-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; O2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], splat (i32 1) -; O2-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]] -; O2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]] +; O2-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0]] +; O2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]] ; O2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; O2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; O2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; O2: middle.block: -; O2-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER9]] -; O2: for.body4.preheader9: -; O2-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, [[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; O2-NEXT: br label [[FOR_BODY4:%.*]] -; O2: for.cond.cleanup: +; O2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; O2: [[MIDDLE_BLOCK]]: +; O2-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4_PREHEADER9]] +; O2: [[FOR_BODY4_PREHEADER9]]: +; O2-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] +; O2-NEXT: br label %[[FOR_BODY4:.*]] +; O2: [[FOR_COND_CLEANUP:.*]]: ; O2-NEXT: ret void -; O2: for.cond.cleanup3: +; O2: [[FOR_COND_CLEANUP3]]: ; O2-NEXT: [[INC7]] = add nuw nsw i64 [[I_06]], 1 ; O2-NEXT: [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100 -; O2-NEXT: br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP8:![0-9]+]] -; O2: for.body4: -; O2-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ [[J_05_PH]], [[FOR_BODY4_PREHEADER9]] ] +; O2-NEXT: br i1 [[EXITCOND7_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP8:![0-9]+]] +; O2: [[FOR_BODY4]]: +; O2-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], %[[FOR_BODY4]] ], [ [[J_05_PH]], %[[FOR_BODY4_PREHEADER9]] ] ; O2-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05]] -; O2-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA0]] +; O2-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA0]] ; O2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 -; O2-NEXT: store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA0]] +; O2-NEXT: store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA0]] ; O2-NEXT: [[INC5]] = add nuw i64 [[J_05]], 1 ; O2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5]], [[NUMELEMS]] -; O2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP9:![0-9]+]] +; O2-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4]], !llvm.loop [[LOOP9:![0-9]+]] ; -; O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy -; O3-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; O3-NEXT: entry: +; O3-LABEL: define dso_local void @_Z7computeRSt6vectorIiSaIiEEy( +; O3-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O3-NEXT: [[ENTRY:.*:]] ; O3-NEXT: [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 ; O3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8 -; O3-NEXT: br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] -; O3: for.cond1.preheader.us.preheader: +; O3-NEXT: br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_COND1_PREHEADER_US_PREHEADER:.*]] +; O3: [[FOR_COND1_PREHEADER_US_PREHEADER]]: ; O3-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 ; O3-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8 ; O3-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUMELEMS]], [[N_VEC]] -; O3-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] -; O3: for.cond1.preheader.us: -; O3-NEXT: [[I_06_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; O3-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] -; O3: vector.body: -; O3-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ] +; O3-NEXT: br label %[[FOR_COND1_PREHEADER_US:.*]] +; O3: [[FOR_COND1_PREHEADER_US]]: +; O3-NEXT: [[I_06_US:%.*]] = phi i64 [ [[INC7_US:%.*]], %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:.*]] ], [ 0, %[[FOR_COND1_PREHEADER_US_PREHEADER]] ] +; O3-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY4_US_PREHEADER:.*]], label %[[VECTOR_BODY:.*]] +; O3: [[VECTOR_BODY]]: +; O3-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], [ 0, %[[FOR_COND1_PREHEADER_US]] ] ; O3-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]] ; O3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 -; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; O3-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]] +; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] +; O3-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]] ; O3-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; O3-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], splat (i32 1) -; O3-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]] -; O3-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]] +; O3-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0]] +; O3-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]] ; O3-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; O3-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; O3-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; O3: middle.block: -; O3-NEXT: br i1 [[CMP_N]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US_PREHEADER]] -; O3: for.body4.us.preheader: -; O3-NEXT: [[J_05_US_PH:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; O3-NEXT: br label [[FOR_BODY4_US:%.*]] -; O3: for.body4.us: -; O3-NEXT: [[J_05_US:%.*]] = phi i64 [ [[INC5_US:%.*]], [[FOR_BODY4_US]] ], [ [[J_05_US_PH]], [[FOR_BODY4_US_PREHEADER]] ] +; O3-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; O3: [[MIDDLE_BLOCK]]: +; O3-NEXT: br i1 [[CMP_N]], label %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label %[[FOR_BODY4_US_PREHEADER]] +; O3: [[FOR_BODY4_US_PREHEADER]]: +; O3-NEXT: [[J_05_US_PH:%.*]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER_US]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] +; O3-NEXT: br label %[[FOR_BODY4_US:.*]] +; O3: [[FOR_BODY4_US]]: +; O3-NEXT: [[J_05_US:%.*]] = phi i64 [ [[INC5_US:%.*]], %[[FOR_BODY4_US]] ], [ [[J_05_US_PH]], %[[FOR_BODY4_US_PREHEADER]] ] ; O3-NEXT: [[ADD_PTR_I_US:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05_US]] -; O3-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I_US]], align 4, !tbaa [[TBAA0]] +; O3-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I_US]], align 4, !tbaa [[INT_TBAA0]] ; O3-NEXT: [[INC_US:%.*]] = add nsw i32 [[TMP6]], 1 -; O3-NEXT: store i32 [[INC_US]], ptr [[ADD_PTR_I_US]], align 4, !tbaa [[TBAA0]] +; O3-NEXT: store i32 [[INC_US]], ptr [[ADD_PTR_I_US]], align 4, !tbaa [[INT_TBAA0]] ; O3-NEXT: [[INC5_US]] = add nuw i64 [[J_05_US]], 1 ; O3-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5_US]], [[NUMELEMS]] -; O3-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP8:![0-9]+]] -; O3: for.cond1.for.cond.cleanup3_crit_edge.us: +; O3-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label %[[FOR_BODY4_US]], !llvm.loop [[LOOP8:![0-9]+]] +; O3: [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]: ; O3-NEXT: [[INC7_US]] = add nuw nsw i64 [[I_06_US]], 1 ; O3-NEXT: [[EXITCOND8_NOT:%.*]] = icmp eq i64 [[INC7_US]], 100 -; O3-NEXT: br i1 [[EXITCOND8_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]], !llvm.loop [[LOOP9:![0-9]+]] -; O3: for.cond.cleanup: +; O3-NEXT: br i1 [[EXITCOND8_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_COND1_PREHEADER_US]], !llvm.loop [[LOOP9:![0-9]+]] +; O3: [[FOR_COND_CLEANUP]]: ; O3-NEXT: ret void ; entry: @@ -237,3 +237,34 @@ declare void @llvm.lifetime.end.p0(ptr nocapture) !15 = !{!"long", !5, i64 0} !16 = !{!17, !4, i64 0} !17 = !{!"_ZTSNSt12_Vector_baseIiSaIiEE17_Vector_impl_dataE", !4, i64 0, !4, i64 8, !4, i64 16} +;. +; O1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} +; O1: [[META1]] = !{!"llvm.loop.mustprogress"} +; O1: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +; O1: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +; O1: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +; O1: [[META5]] = !{!"Simple C++ TBAA"} +; O1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]} +;. +; O2: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; O2: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} +; O2: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; O2: [[META3]] = !{!"Simple C++ TBAA"} +; O2: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]} +; O2: [[META5]] = !{!"llvm.loop.mustprogress"} +; O2: [[META6]] = !{!"llvm.loop.isvectorized", i32 1} +; O2: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"} +; O2: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]} +; O2: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]], [[META7]], [[META6]]} +;. +; O3: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; O3: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} +; O3: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; O3: [[META3]] = !{!"Simple C++ TBAA"} +; O3: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]} +; O3: [[META5]] = !{!"llvm.loop.mustprogress"} +; O3: [[META6]] = !{!"llvm.loop.isvectorized", i32 1} +; O3: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"} +; O3: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]], [[META7]], [[META6]]} +; O3: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll index cb378465e30ec..ac736518c0cbd 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes="default" -S %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" @@ -23,18 +23,18 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[INT_TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]] -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]] +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]] ; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[WIDE_LOAD12]] to <4 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], <4 x i64> [[TMP5]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP6]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[DOUBLE_TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP6]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]] ; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] +; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -49,18 +49,18 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]] ; CHECK-NEXT: [[IDXPROM3_3:%.*]] = sext i32 [[TMP22]] to i64 ; CHECK-NEXT: [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]] ; CHECK-NEXT: [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64 ; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]] -; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] -; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] +; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] +; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] ; CHECK-NEXT: [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]] ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]] -; CHECK-NEXT: store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] +; CHECK-NEXT: store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -190,12 +190,12 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !18 = !{!"llvm.loop.vectorize.enable", i1 true} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"Simple C++ TBAA"} ; CHECK: [[ACC_GRP4]] = distinct !{} -; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[DOUBLE_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} ; CHECK: [[META6]] = !{!"double", [[META2]], i64 0} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]} ; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll index ec387d6ae44f2..fcdb68353311d 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes="default" -S < %s | FileCheck --check-prefixes=O1 %s ; RUN: opt -passes="default" -S < %s | FileCheck --check-prefixes=O23 %s ; RUN: opt -passes="default" -S < %s | FileCheck --check-prefixes=O23 %s @@ -9,57 +9,59 @@ target triple = "x86_64-unknown-linux-gnu" ; We should retain the TBAA on the load here, not lose it. define void @licm(ptr align 8 dereferenceable(8) %_M_start.i, i64 %numElem) { -; O1-LABEL: @licm( -; O1-NEXT: entry: -; O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] -; O1: for.body.lr.ph: -; O1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] -; O1-NEXT: br label [[FOR_BODY:%.*]] -; O1: for.body: -; O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; O1-LABEL: define void @licm( +; O1-SAME: ptr readonly align 8 captures(none) dereferenceable(8) [[_M_START_I:%.*]], i64 [[NUMELEM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O1-NEXT: [[ENTRY:.*:]] +; O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM]], 0 +; O1-NEXT: br i1 [[CMP1_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_LR_PH:.*]] +; O1: [[FOR_BODY_LR_PH]]: +; O1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[_M_START_I]], align 8, !tbaa [[ANYPTR_TBAA3:![0-9]+]] +; O1-NEXT: br label %[[FOR_BODY:.*]] +; O1: [[FOR_BODY]]: +; O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[K_02]] -; O1-NEXT: store double 2.000000e+00, ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]] +; O1-NEXT: store double 2.000000e+00, ptr [[ADD_PTR_I]], align 8, !tbaa [[DOUBLE_TBAA8:![0-9]+]] ; O1-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] -; O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] -; O1: for.cond.cleanup: +; O1-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; O1: [[FOR_COND_CLEANUP]]: ; O1-NEXT: ret void ; -; O23-LABEL: @licm( -; O23-NEXT: entry: -; O23-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] -; O23: for.body.lr.ph: -; O23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] +; O23-LABEL: define void @licm( +; O23-SAME: ptr readonly align 8 captures(none) dereferenceable(8) [[_M_START_I:%.*]], i64 [[NUMELEM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O23-NEXT: [[ENTRY:.*:]] +; O23-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM]], 0 +; O23-NEXT: br i1 [[CMP1_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_LR_PH:.*]] +; O23: [[FOR_BODY_LR_PH]]: +; O23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[_M_START_I]], align 8, !tbaa [[ANYPTR_TBAA3:![0-9]+]] ; O23-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4 -; O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] -; O23: vector.ph: +; O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER:.*]], label %[[VECTOR_PH:.*]] +; O23: [[VECTOR_PH]]: ; O23-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4 -; O23-NEXT: br label [[VECTOR_BODY:%.*]] -; O23: vector.body: -; O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; O23-NEXT: br label %[[VECTOR_BODY:.*]] +; O23: [[VECTOR_BODY]]: +; O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; O23-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[INDEX]] ; O23-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 -; O23-NEXT: store <2 x double> splat (double 2.000000e+00), ptr [[TMP1]], align 8, !tbaa [[TBAA8:![0-9]+]] -; O23-NEXT: store <2 x double> splat (double 2.000000e+00), ptr [[TMP2]], align 8, !tbaa [[TBAA8]] +; O23-NEXT: store <2 x double> splat (double 2.000000e+00), ptr [[TMP1]], align 8, !tbaa [[DOUBLE_TBAA8:![0-9]+]] +; O23-NEXT: store <2 x double> splat (double 2.000000e+00), ptr [[TMP2]], align 8, !tbaa [[DOUBLE_TBAA8]] ; O23-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; O23-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; O23-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; O23: middle.block: +; O23-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; O23: [[MIDDLE_BLOCK]]: ; O23-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUMELEM]], [[N_VEC]] -; O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]] -; O23: for.body.preheader: -; O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; O23-NEXT: br label [[FOR_BODY:%.*]] -; O23: for.body: -; O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ] +; O23-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER]] +; O23: [[FOR_BODY_PREHEADER]]: +; O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] +; O23-NEXT: br label %[[FOR_BODY:.*]] +; O23: [[FOR_BODY]]: +; O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[K_02_PH]], %[[FOR_BODY_PREHEADER]] ] ; O23-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[K_02]] -; O23-NEXT: store double 2.000000e+00, ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]] +; O23-NEXT: store double 2.000000e+00, ptr [[ADD_PTR_I]], align 8, !tbaa [[DOUBLE_TBAA8]] ; O23-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; O23-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] -; O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] -; O23: for.cond.cleanup: +; O23-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; O23: [[FOR_COND_CLEANUP]]: ; O23-NEXT: ret void ; entry: @@ -94,3 +96,24 @@ for.cond.cleanup: ; preds = %for.cond !7 = !{!"Simple C++ TBAA"} !8 = !{!9, !9, i64 0} !9 = !{!"double", !6, i64 0} +;. +; O1: [[ANYPTR_TBAA3]] = !{[[META4:![0-9]+]], [[META5:![0-9]+]], i64 0} +; O1: [[META4]] = !{!"_ZTSNSt12_Vector_baseIdSaIdEE17_Vector_impl_dataE", [[META5]], i64 0, [[META5]], i64 8, [[META5]], i64 16} +; O1: [[META5]] = !{!"any pointer", [[META6:![0-9]+]], i64 0} +; O1: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +; O1: [[META7]] = !{!"Simple C++ TBAA"} +; O1: [[DOUBLE_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +; O1: [[META9]] = !{!"double", [[META6]], i64 0} +;. +; O23: [[ANYPTR_TBAA3]] = !{[[META4:![0-9]+]], [[META5:![0-9]+]], i64 0} +; O23: [[META4]] = !{!"_ZTSNSt12_Vector_baseIdSaIdEE17_Vector_impl_dataE", [[META5]], i64 0, [[META5]], i64 8, [[META5]], i64 16} +; O23: [[META5]] = !{!"any pointer", [[META6:![0-9]+]], i64 0} +; O23: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} +; O23: [[META7]] = !{!"Simple C++ TBAA"} +; O23: [[DOUBLE_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +; O23: [[META9]] = !{!"double", [[META6]], i64 0} +; O23: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]], [[META12:![0-9]+]]} +; O23: [[META11]] = !{!"llvm.loop.isvectorized", i32 1} +; O23: [[META12]] = !{!"llvm.loop.unroll.runtime.disable"} +; O23: [[LOOP13]] = distinct !{[[LOOP13]], [[META12]], [[META11]]} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll index 438a93c735796..574132c18d263 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes="default" -S < %s | FileCheck --check-prefixes=O1 %s ; RUN: opt -passes="default" -S < %s | FileCheck --check-prefixes=O23 %s ; RUN: opt -passes="default" -S < %s | FileCheck --check-prefixes=O23 %s @@ -13,65 +13,65 @@ target triple = "x86_64-unknown-linux-gnu" $_ZN12FloatVecPair6vecIncEv = comdat any define dso_local void @_Z13vecIncFromPtrP12FloatVecPair(ptr %FVP) { -; O1-LABEL: define {{[^@]+}}@_Z13vecIncFromPtrP12FloatVecPair -; O1-SAME: (ptr readonly captures(none) [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; O1-NEXT: entry: +; O1-LABEL: define dso_local void @_Z13vecIncFromPtrP12FloatVecPair( +; O1-SAME: ptr readonly captures(none) [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O1-NEXT: [[ENTRY:.*:]] ; O1-NEXT: [[VSRC23_I:%.*]] = getelementptr inbounds nuw i8, ptr [[FVP]], i64 16 -; O1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]] +; O1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]] ; O1-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef ; O1-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_I_I]], i64 8 -; O1-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] +; O1-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[INT_TBAA6:![0-9]+]] ; O1-NEXT: [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; O1-NEXT: br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] -; O1: for.body7.lr.ph.i: -; O1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA8:![0-9]+]] +; O1-NEXT: br i1 [[CMP56_NOT_I]], label %[[_ZN12FLOATVECPAIR6VECINCEV_EXIT:.*]], label %[[FOR_BODY7_LR_PH_I:.*]] +; O1: [[FOR_BODY7_LR_PH_I]]: +; O1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[ANYPTR_TBAA8:![0-9]+]] ; O1-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 undef -; O1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[TBAA0]] +; O1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[ANYPTR_TBAA0]] ; O1-NEXT: [[ARRAYIDX_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP3]], i64 undef -; O1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[TBAA8]] +; O1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[ANYPTR_TBAA8]] ; O1-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 undef -; O1-NEXT: br label [[FOR_BODY7_I:%.*]] -; O1: for.body7.i: -; O1-NEXT: [[J_07_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; O1-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9:![0-9]+]] -; O1-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9]] +; O1-NEXT: br label %[[FOR_BODY7_I:.*]] +; O1: [[FOR_BODY7_I]]: +; O1-NEXT: [[J_07_I:%.*]] = phi i32 [ 0, %[[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], %[[FOR_BODY7_I]] ] +; O1-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[FLOAT_TBAA9:![0-9]+]] +; O1-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[FLOAT_TBAA9]] ; O1-NEXT: [[ADD_I:%.*]] = fadd float [[TMP5]], [[TMP6]] -; O1-NEXT: store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9]] +; O1-NEXT: store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[FLOAT_TBAA9]] ; O1-NEXT: [[INC_I]] = add nuw i32 [[J_07_I]], 1 ; O1-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]] -; O1-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] -; O1: _ZN12FloatVecPair6vecIncEv.exit: +; O1-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label %[[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] +; O1: [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]]: ; O1-NEXT: ret void ; -; O23-LABEL: define {{[^@]+}}@_Z13vecIncFromPtrP12FloatVecPair -; O23-SAME: (ptr readonly captures(none) [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; O23-NEXT: entry: +; O23-LABEL: define dso_local void @_Z13vecIncFromPtrP12FloatVecPair( +; O23-SAME: ptr readonly captures(none) [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O23-NEXT: [[ENTRY:.*:]] ; O23-NEXT: [[VSRC23_I:%.*]] = getelementptr inbounds nuw i8, ptr [[FVP]], i64 16 -; O23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]] +; O23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]] ; O23-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef ; O23-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_I_I]], i64 8 -; O23-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] +; O23-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[INT_TBAA6:![0-9]+]] ; O23-NEXT: [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; O23-NEXT: br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] -; O23: for.body7.lr.ph.i: -; O23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA8:![0-9]+]] +; O23-NEXT: br i1 [[CMP56_NOT_I]], label %[[_ZN12FLOATVECPAIR6VECINCEV_EXIT:.*]], label %[[FOR_BODY7_LR_PH_I:.*]] +; O23: [[FOR_BODY7_LR_PH_I]]: +; O23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[ANYPTR_TBAA8:![0-9]+]] ; O23-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 undef -; O23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[TBAA0]] +; O23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[ANYPTR_TBAA0]] ; O23-NEXT: [[ARRAYIDX_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP3]], i64 undef -; O23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[TBAA8]] +; O23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[ANYPTR_TBAA8]] ; O23-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 undef -; O23-NEXT: [[DOTPRE_I:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9:![0-9]+]] -; O23-NEXT: br label [[FOR_BODY7_I:%.*]] -; O23: for.body7.i: -; O23-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], [[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ] -; O23-NEXT: [[J_07_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; O23-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] +; O23-NEXT: [[DOTPRE_I:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[FLOAT_TBAA9:![0-9]+]] +; O23-NEXT: br label %[[FOR_BODY7_I:.*]] +; O23: [[FOR_BODY7_I]]: +; O23-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], %[[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], %[[FOR_BODY7_I]] ] +; O23-NEXT: [[J_07_I:%.*]] = phi i32 [ 0, %[[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], %[[FOR_BODY7_I]] ] +; O23-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[FLOAT_TBAA9]] ; O23-NEXT: [[ADD_I]] = fadd float [[TMP5]], [[TMP6]] -; O23-NEXT: store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9]] +; O23-NEXT: store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[FLOAT_TBAA9]] ; O23-NEXT: [[INC_I]] = add nuw i32 [[J_07_I]], 1 ; O23-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]] -; O23-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] -; O23: _ZN12FloatVecPair6vecIncEv.exit: +; O23-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label %[[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] +; O23: [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]]: ; O23-NEXT: ret void ; entry: @@ -163,3 +163,32 @@ entry: !12 = !{!13, !1, i64 0} !13 = !{!"_ZTS14HomemadeVectorIS_IfLj8EELj8EE", !1, i64 0, !5, i64 8} !14 = !{!7, !1, i64 0} +;. +; O1: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0} +; O1: [[META1]] = !{!"_ZTS14HomemadeVectorIS_IfLj8EELj8EE", [[META2]], i64 0, [[META5:![0-9]+]], i64 8} +; O1: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0} +; O1: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; O1: [[META4]] = !{!"Simple C++ TBAA"} +; O1: [[META5]] = !{!"int", [[META3]], i64 0} +; O1: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META5]], i64 8} +; O1: [[META7]] = !{!"_ZTS14HomemadeVectorIfLj8EE", [[META2]], i64 0, [[META5]], i64 8} +; O1: [[ANYPTR_TBAA8]] = !{[[META7]], [[META2]], i64 0} +; O1: [[FLOAT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +; O1: [[META10]] = !{!"float", [[META3]], i64 0} +; O1: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]} +; O1: [[META12]] = !{!"llvm.loop.mustprogress"} +;. +; O23: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0} +; O23: [[META1]] = !{!"_ZTS14HomemadeVectorIS_IfLj8EELj8EE", [[META2]], i64 0, [[META5:![0-9]+]], i64 8} +; O23: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0} +; O23: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; O23: [[META4]] = !{!"Simple C++ TBAA"} +; O23: [[META5]] = !{!"int", [[META3]], i64 0} +; O23: [[INT_TBAA6]] = !{[[META7:![0-9]+]], [[META5]], i64 8} +; O23: [[META7]] = !{!"_ZTS14HomemadeVectorIfLj8EE", [[META2]], i64 0, [[META5]], i64 8} +; O23: [[ANYPTR_TBAA8]] = !{[[META7]], [[META2]], i64 0} +; O23: [[FLOAT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +; O23: [[META10]] = !{!"float", [[META3]], i64 0} +; O23: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]} +; O23: [[META12]] = !{!"llvm.loop.mustprogress"} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll index 69a46b26decb2..ae6f4a7b76ab8 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -O3 -S | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -15,22 +15,23 @@ target triple = "x86_64-apple-macosx11.0.0" ; } define void @vdiv(ptr %a, float %b) #0 { -; CHECK-LABEL: @vdiv( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +; CHECK-LABEL: define void @vdiv( +; CHECK-SAME: ptr captures(none) [[A:%.*]], float [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast <4 x float> splat (float 1.000000e+00), [[BROADCAST_SPLAT]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !tbaa [[FLOAT_TBAA3:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], [[TMP0]] -; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[FLOAT_TBAA3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.cond.cleanup: +; CHECK-NEXT: br i1 [[TMP5]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void ; entry: @@ -97,3 +98,14 @@ attributes #2 = { nounwind } !11 = distinct !{!11, !12, !13} !12 = !{!"llvm.loop.mustprogress"} !13 = !{!"llvm.loop.unroll.disable"} +;. +; CHECK: [[FLOAT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"float", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]} +; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[META9]] = !{!"llvm.loop.unroll.disable"} +; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 7817c23e6a3ec..f7bc01e0e8af1 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -O3 -S | FileCheck %s ; RUN: opt < %s -passes="default" -S | FileCheck %s @@ -12,41 +12,42 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 target triple = "x86_64-apple-macosx10.15.0" define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { -; CHECK-LABEL: @vdiv( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] -; CHECK: iter.check: -; CHECK-NEXT: [[X4:%.*]] = ptrtoint ptr [[X:%.*]] to i64 -; CHECK-NEXT: [[Y5:%.*]] = ptrtoint ptr [[Y:%.*]] to i64 +; CHECK-LABEL: define void @vdiv( +; CHECK-SAME: ptr writeonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], double [[A:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[X4:%.*]] = ptrtoint ptr [[X]] to i64 +; CHECK-NEXT: [[Y5:%.*]] = ptrtoint ptr [[Y]] to i64 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X4]], [[Y5]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_BODY_PREHEADER9:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_BODY_PREHEADER9:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i32 [[N]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] -; CHECK: vector.ph: +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]] +; CHECK: [[VECTOR_PH1]]: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 32 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 64 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 96 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP5]], align 8, !tbaa [[TBAA3:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP6]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP7]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, ptr [[TMP8]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP5]], align 8, !tbaa [[DOUBLE_TBAA3:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP6]], align 8, !tbaa [[DOUBLE_TBAA3]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP7]], align 8, !tbaa [[DOUBLE_TBAA3]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, ptr [[TMP8]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[TMP3]] @@ -55,67 +56,67 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 32 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 64 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 96 -; CHECK-NEXT: store <4 x double> [[TMP9]], ptr [[TMP13]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: store <4 x double> [[TMP10]], ptr [[TMP14]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: store <4 x double> [[TMP11]], ptr [[TMP15]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: store <4 x double> [[TMP12]], ptr [[TMP16]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store <4 x double> [[TMP9]], ptr [[TMP13]], align 8, !tbaa [[DOUBLE_TBAA3]] +; CHECK-NEXT: store <4 x double> [[TMP10]], ptr [[TMP14]], align 8, !tbaa [[DOUBLE_TBAA3]] +; CHECK-NEXT: store <4 x double> [[TMP11]], ptr [[TMP15]], align 8, !tbaa [[DOUBLE_TBAA3]] +; CHECK-NEXT: store <4 x double> [[TMP12]], ptr [[TMP16]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: middle.block: +; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 12 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[FOR_BODY_PREHEADER9]], label [[VEC_EPILOG_PH]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER9]], label %[[VEC_EPILOG_PH]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] ; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT14]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT15]] -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX12]] -; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x double>, ptr [[TMP39]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x double>, ptr [[TMP39]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP40:%.*]] = fmul fast <4 x double> [[WIDE_LOAD13]], [[TMP38]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDEX12]] -; CHECK-NEXT: store <4 x double> [[TMP40]], ptr [[TMP41]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store <4 x double> [[TMP40]], ptr [[TMP41]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX12]], 4 ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC11]] -; CHECK-NEXT: br i1 [[TMP42]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: br i1 [[TMP42]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[N_VEC11]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[CMP_N17]], label [[FOR_END]], label [[FOR_BODY_PREHEADER9]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br i1 [[CMP_N17]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9]] +; CHECK: [[FOR_BODY_PREHEADER9]]: +; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[TMP43:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]] ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP43]], 7 ; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]] -; CHECK: for.body.prol.preheader: +; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]], label %[[FOR_BODY_PROL_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PROL_PREHEADER]]: ; CHECK-NEXT: [[TMP18:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]] -; CHECK: for.body.prol: -; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ] -; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PROL_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_BODY_PROL:.*]] +; CHECK: [[FOR_BODY_PROL]]: +; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], %[[FOR_BODY_PROL]] ], [ 0, %[[FOR_BODY_PROL_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_PROL]] -; CHECK-NEXT: [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP19:%.*]] = fmul fast double [[T0_PROL]], [[TMP18]] ; CHECK-NEXT: [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_PROL]] -; CHECK-NEXT: store double [[TMP19]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP19]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]] -; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: for.body.prol.loopexit: -; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT]], label %[[FOR_BODY_PROL]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[FOR_BODY_PROL_LOOPEXIT]]: +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ] ; CHECK-NEXT: [[TMP20:%.*]] = sub nsw i64 [[INDVARS_IV_PH]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp ugt i64 [[TMP20]], -8 -; CHECK-NEXT: br i1 [[TMP21]], label [[FOR_END]], label [[FOR_BODY_PREHEADER9_NEW:%.*]] -; CHECK: for.body.preheader.new: +; CHECK-NEXT: br i1 [[TMP21]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9_NEW:.*]] +; CHECK: [[FOR_BODY_PREHEADER9_NEW]]: ; CHECK-NEXT: [[TMP22:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: [[TMP23:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: [[TMP24:%.*]] = fdiv fast double 1.000000e+00, [[A]] @@ -124,60 +125,60 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: [[TMP28:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], %[[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP30:%.*]] = fmul fast double [[T0]], [[TMP22]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store double [[TMP30]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP30]], ptr [[ARRAYIDX2]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP31:%.*]] = fmul fast double [[T0_1]], [[TMP23]] ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: store double [[TMP31]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP31]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP32:%.*]] = fmul fast double [[T0_2]], [[TMP24]] ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: store double [[TMP32]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP32]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP33:%.*]] = fmul fast double [[T0_3]], [[TMP25]] ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: store double [[TMP33]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP33]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_3]] -; CHECK-NEXT: [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP34:%.*]] = fmul fast double [[T0_4]], [[TMP26]] ; CHECK-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_3]] -; CHECK-NEXT: store double [[TMP34]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP34]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_4]] -; CHECK-NEXT: [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast double [[T0_5]], [[TMP27]] ; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_4]] -; CHECK-NEXT: store double [[TMP35]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP35]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_5]] -; CHECK-NEXT: [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP36:%.*]] = fmul fast double [[T0_6]], [[TMP28]] ; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_5]] -; CHECK-NEXT: store double [[TMP36]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP36]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_6]] -; CHECK-NEXT: [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast double [[T0_7]], [[TMP29]] ; CHECK-NEXT: [[ARRAYIDX2_7:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_6]] -; CHECK-NEXT: store double [[TMP37]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP37]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[DOUBLE_TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8 ; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] -; CHECK: for.end: +; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; entry: @@ -223,3 +224,16 @@ attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"=" !5 = !{!"omnipotent char", !6, i64 0} !6 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[DOUBLE_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"double", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} +; CHECK: [[META8]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META9]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]} +; CHECK: [[META12]] = !{!"llvm.loop.unroll.disable"} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META8]]} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll index ae0e59169d3e5..5253c42d9c6d2 100644 --- a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes='default' -S %s | FileCheck %s ; Slightly reduced test case for a loop iterating over a std::span with libc++ hardening. @@ -18,30 +18,30 @@ %"struct.std::__1::__bounded_iter" = type { ptr, ptr, ptr } define void @test_fill_with_foreach([2 x i64] %elems.coerce) { -; CHECK-LABEL: define void @test_fill_with_foreach -; CHECK-SAME: ([2 x i64] [[ELEMS_COERCE:%.*]]) local_unnamed_addr { -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @test_fill_with_foreach( +; CHECK-SAME: [2 x i64] [[ELEMS_COERCE:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ELEMS_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[ELEMS_COERCE_FCA_0_EXTRACT]] to ptr ; CHECK-NEXT: [[ELEMS_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 1 ; CHECK-NEXT: [[ADD_PTR_I_IDX:%.*]] = shl nsw i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 2 ; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[ADD_PTR_I_IDX]] ; CHECK-NEXT: [[CMP_NOT_I_I_I_I:%.*]] = icmp slt i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT_I_I_I_I]], label [[ERROR:%.*]], label [[FOR_COND_PREHEADER_SPLIT:%.*]] -; CHECK: for.cond.preheader.split: +; CHECK-NEXT: br i1 [[CMP_NOT_I_I_I_I]], label %[[ERROR:.*]], label %[[FOR_COND_PREHEADER_SPLIT:.*]] +; CHECK: [[FOR_COND_PREHEADER_SPLIT]]: ; CHECK-NEXT: [[CMP_I_NOT2:%.*]] = icmp eq i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 0 -; CHECK-NEXT: br i1 [[CMP_I_NOT2]], label [[COMMON_RET:%.*]], label [[FOR_BODY:%.*]] -; CHECK: common.ret: +; CHECK-NEXT: br i1 [[CMP_I_NOT2]], label %[[COMMON_RET:.*]], label %[[FOR_BODY:.*]] +; CHECK: [[COMMON_RET]]: ; CHECK-NEXT: ret void -; CHECK: error: +; CHECK: [[ERROR]]: ; CHECK-NEXT: tail call void @error() -; CHECK-NEXT: br label [[COMMON_RET]] -; CHECK: for.body: -; CHECK-NEXT: [[__BEGIN1_SROA_0_03:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_COND_PREHEADER_SPLIT]] ] +; CHECK-NEXT: br label %[[COMMON_RET]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[__BEGIN1_SROA_0_03:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], %[[FOR_BODY]] ], [ [[TMP0]], %[[FOR_COND_PREHEADER_SPLIT]] ] ; CHECK-NEXT: tail call void @use(ptr noundef nonnull align 4 dereferenceable(4) [[__BEGIN1_SROA_0_03]]) ; CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__BEGIN1_SROA_0_03]], i64 4 ; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR_I]], [[ADD_PTR_I]] -; CHECK-NEXT: br i1 [[CMP_I_NOT]], label [[COMMON_RET]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[CMP_I_NOT]], label %[[COMMON_RET]], label %[[FOR_BODY]] ; entry: %elems = alloca %"class.std::__1::span", align 8 @@ -131,29 +131,29 @@ declare void @llvm.lifetime.end.p0(ptr nocapture) %Vector_impl_data = type { ptr, ptr, ptr } define void @foo(ptr noundef nonnull align 8 dereferenceable(24) noalias %vec) #0 { -; CHECK-LABEL: define void @foo -; CHECK-SAME: (ptr noalias noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ptr noalias noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[_M_FINISH_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8 -; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[ANYPTR_TBAA5:![0-9]+]] ; CHECK-NEXT: [[SUB_PTR_LHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64 ; CHECK-NEXT: [[SUB_PTR_RHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; CHECK-NEXT: [[SUB_PTR_SUB_I_I:%.*]] = sub i64 [[SUB_PTR_LHS_CAST_I_I]], [[SUB_PTR_RHS_CAST_I_I]] ; CHECK-NEXT: [[SUB_PTR_DIV_I_I:%.*]] = ashr exact i64 [[SUB_PTR_SUB_I_I]], 3 ; CHECK-NEXT: [[CMP_NOT9:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP_NOT9]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: +; CHECK-NEXT: br i1 [[CMP_NOT9]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[I_010:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_010:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 [[I_010]] ; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ADD_PTR_I]], align 8 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], 1.000000e+00 ; CHECK-NEXT: store double [[ADD]], ptr [[ADD_PTR_I]], align 8 ; CHECK-NEXT: [[INC]] = add nuw i64 [[I_010]], 1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], [[SUB_PTR_DIV_I_I]] -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; entry: %vec.addr = alloca ptr, align 8 @@ -270,29 +270,29 @@ declare void @abort() ; https://github.com/llvm/llvm-project/issues/63126 define void @loop_with_signed_induction(ptr noundef nonnull align 8 dereferenceable(24) %vec) { -; CHECK-LABEL: define void @loop_with_signed_induction -; CHECK-SAME: (ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @loop_with_signed_induction( +; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[_M_FINISH_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8 -; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[TBAA5]] +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[ANYPTR_TBAA0]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[ANYPTR_TBAA5]] ; CHECK-NEXT: [[SUB_PTR_LHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64 ; CHECK-NEXT: [[SUB_PTR_RHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; CHECK-NEXT: [[SUB_PTR_SUB_I_I:%.*]] = sub i64 [[SUB_PTR_LHS_CAST_I_I]], [[SUB_PTR_RHS_CAST_I_I]] ; CHECK-NEXT: [[SUB_PTR_DIV_I_I:%.*]] = ashr exact i64 [[SUB_PTR_SUB_I_I]], 3 ; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i64 [[SUB_PTR_DIV_I_I]], 0 -; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.cond.cleanup: +; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[I_010:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_010:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i64 [[I_010]] -; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA6:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ADD_PTR_I]], align 8, !tbaa [[DOUBLE_TBAA6:![0-9]+]] ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], 1.000000e+00 -; CHECK-NEXT: store double [[ADD]], ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA6]] +; CHECK-NEXT: store double [[ADD]], ptr [[ADD_PTR_I]], align 8, !tbaa [[DOUBLE_TBAA6]] ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_010]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[SUB_PTR_DIV_I_I]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] ; entry: %vec.addr = alloca ptr, align 8 @@ -343,22 +343,22 @@ for.end: define void @monkey(ptr noundef %arr, i32 noundef %len) { -; CHECK-LABEL: define void @monkey -; CHECK-SAME: (ptr noundef captures(none) [[ARR:%.*]], i32 noundef [[LEN:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @monkey( +; CHECK-SAME: ptr noundef captures(none) [[ARR:%.*]], i32 noundef [[LEN:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[LEN]], 1 -; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY4_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body4.preheader: -; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY4:%.*]] -; CHECK: for.cond.cleanup: +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY4_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY4_PREHEADER]]: +; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_COND_CLEANUP3:.*]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY4:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void -; CHECK: for.cond.cleanup3: +; CHECK: [[FOR_COND_CLEANUP3]]: ; CHECK-NEXT: [[INC]] = add nuw i32 [[I_09]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[LEN]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY4_PREHEADER]], label [[FOR_COND_CLEANUP]] -; CHECK: for.body4: -; CHECK-NEXT: [[K_07:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY4]] ], [ [[I_09]], [[FOR_BODY4_PREHEADER]] ] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY4_PREHEADER]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_BODY4]]: +; CHECK-NEXT: [[K_07:%.*]] = phi i32 [ [[DEC:%.*]], %[[FOR_BODY4]] ], [ [[I_09]], %[[FOR_BODY4_PREHEADER]] ] ; CHECK-NEXT: [[IDX_EXT_I:%.*]] = zext i32 [[K_07]] to i64 ; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR]], i64 [[IDX_EXT_I]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4 @@ -366,7 +366,7 @@ define void @monkey(ptr noundef %arr, i32 noundef %len) { ; CHECK-NEXT: store i32 [[ADD]], ptr [[ADD_PTR_I]], align 4 ; CHECK-NEXT: [[DEC]] = add i32 [[K_07]], -1 ; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]] +; CHECK-NEXT: br i1 [[CMP2_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4]] ; entry: %arr.addr = alloca ptr, align 8 @@ -472,3 +472,13 @@ if.end: ; preds = %entry !7 = !{!1, !2, i64 8} !8 = !{!9, !9, i64 0} !9 = !{!"double", !3, i64 0} +;. +; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 8} +; CHECK: [[META1]] = !{!"_ZTSNSt12_Vector_baseIdSaIdEE17_Vector_impl_dataE", [[META2]], i64 0, [[META2]], i64 8, [[META2]], i64 16} +; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C++ TBAA"} +; CHECK: [[ANYPTR_TBAA5]] = !{[[META1]], [[META2]], i64 0} +; CHECK: [[DOUBLE_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +; CHECK: [[META7]] = !{!"double", [[META3]], i64 0} +;. diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll index aaca5a6c87b4f..1a1fe20350885 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt -mtriple=x86_64-apple-darwin10.0.0 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s ;. @@ -131,7 +131,7 @@ define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind { define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind { ; CHECK-LABEL: define void @memset_pattern_i64_128_tbaa( ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 1024), !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 1024), !tbaa [[DOUBLE_TBAA0:![0-9]+]] ; CHECK-NEXT: ret void ; tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0x400921fb54442d18, i64 128, i1 false), !tbaa !5 @@ -216,7 +216,7 @@ define void @memset_pattern_i64_x_fromnonconstptr(ptr %a, i64 %x, ptr %p) nounwi ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[DOUBLE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0} ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"Simple C++ TBAA"} diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/32-bit.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/32-bit.ll index 5d91e03559dea..bfa18f88a2467 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/32-bit.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/32-bit.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=slp-vectorizer -S < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -7,13 +7,13 @@ target triple = "aarch64-unknown-linux-gnu" %S = type { i8, i8, i8, i8 } define ptr @foo(ptr %this, ptr %rhs) { -; CHECK-LABEL: define ptr @foo -; CHECK-SAME: (ptr [[THIS:%.*]], ptr [[RHS:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[RHS]], align 1, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[THIS]], align 1, !tbaa [[TBAA0]] +; CHECK-LABEL: define ptr @foo( +; CHECK-SAME: ptr [[THIS:%.*]], ptr [[RHS:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[RHS]], align 1, !tbaa [[BOOL_TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[THIS]], align 1, !tbaa [[BOOL_TBAA0]] ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i8> [[TMP0]], [[TMP1]] -; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[THIS]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[THIS]], align 1, !tbaa [[BOOL_TBAA0]] ; CHECK-NEXT: ret ptr [[THIS]] ; entry: @@ -54,3 +54,9 @@ entry: !14 = !{!7, !8, i64 2} !15 = !{!7, !8, i64 3} +;. +; CHECK: [[BOOL_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"bool", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll index 9cb2badc25fb2..76b1d18fdc0a8 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; Debug informations shouldn't affect spill cost. ; RUN: opt -S -passes=slp-vectorizer %s -o - | FileCheck %s @@ -7,17 +7,18 @@ target triple = "aarch64" %struct.S = type { i64, i64 } define void @patatino(i64 %n, i64 %i, ptr %p) !dbg !7 { -; CHECK-LABEL: @patatino( -; CHECK-NEXT: entry: -; CHECK-NEXT: #dbg_value(i64 [[N:%.*]], [[META18:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) -; CHECK-NEXT: #dbg_value(i64 [[I:%.*]], [[META19:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) -; CHECK-NEXT: #dbg_value(ptr [[P:%.*]], [[META20:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) +; CHECK-LABEL: define void @patatino( +; CHECK-SAME: i64 [[N:%.*]], i64 [[I:%.*]], ptr [[P:%.*]]) !dbg [[DBG7:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: #dbg_value(i64 [[N]], [[META18:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i64 [[I]], [[META19:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) +; CHECK-NEXT: #dbg_value(ptr [[P]], [[META20:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) ; CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[P]], i64 [[N]], i32 0, !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: #dbg_value(i64 poison, [[META21:![0-9]+]], !DIExpression(), [[META27:![0-9]+]]) ; CHECK-NEXT: #dbg_value(i64 poison, [[META22:![0-9]+]], !DIExpression(), [[META28:![0-9]+]]) ; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 [[I]], i32 0, !dbg [[DBG29:![0-9]+]] -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X1]], align 8, !dbg [[DBG26]], !tbaa [[TBAA30:![0-9]+]] -; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[X5]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[TBAA30]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X1]], align 8, !dbg [[DBG26]], !tbaa [[LONG_TBAA30:![0-9]+]] +; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[X5]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[LONG_TBAA30]] ; CHECK-NEXT: ret void, !dbg [[DBG35:![0-9]+]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/store-ptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/store-ptr.ll index e32e5f82991d9..2b6a41403fb48 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/store-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/store-ptr.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -7,28 +7,29 @@ target triple = "aarch64" %struct.node = type { i64, i64, ptr, ptr } define void @copy(ptr nocapture noundef writeonly %x, ptr nocapture noundef readonly %y, i32 noundef %n) { -; CHECK-LABEL: @copy( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP34:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: +; CHECK-LABEL: define void @copy( +; CHECK-SAME: ptr noundef writeonly captures(none) [[X:%.*]], ptr noundef readonly captures(none) [[Y:%.*]], i32 noundef [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP34:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP34]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], ptr [[Y:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[X:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA0]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], ptr [[Y]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[X]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 8, !tbaa [[LONG_TBAA0:![0-9]+]] +; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[ARRAYIDX2]], align 8, !tbaa [[LONG_TBAA0]] ; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[Y]], i64 [[INDVARS_IV]], i32 2 ; CHECK-NEXT: [[C13:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[X]], i64 [[INDVARS_IV]], i32 2 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[C]], align 8, !tbaa [[TBAA4:![0-9]+]] -; CHECK-NEXT: store <2 x ptr> [[TMP1]], ptr [[C13]], align 8, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[C]], align 8, !tbaa [[ANYPTR_TBAA4:![0-9]+]] +; CHECK-NEXT: store <2 x ptr> [[TMP1]], ptr [[C13]], align 8, !tbaa [[ANYPTR_TBAA4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; entry: %cmp34 = icmp sgt i32 %n, 0 @@ -74,3 +75,11 @@ for.body: !11 = !{!5, !9, i64 16} !12 = !{!5, !9, i64 24} +;. +; CHECK: [[LONG_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"long", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[ANYPTR_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"any pointer", [[META2]], i64 0} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll index 85b8157c949f1..541e76138e373 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -mtriple=s390x-unknown-linux -mcpu=z16 -S -passes=slp-vectorizer \ ; RUN: -pass-remarks-output=%t | FileCheck %s ; RUN: cat %t | FileCheck -check-prefix=REMARK %s @@ -147,8 +147,8 @@ define void @fun3(ptr %0) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 48 ; CHECK-NEXT: br label %[[BB5:.*]] ; CHECK: [[BB5]]: -; CHECK-NEXT: store ptr null, ptr [[TMP3]], align 8, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr inttoptr (i64 64 to ptr), align 8, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: store ptr null, ptr [[TMP3]], align 8, !tbaa [[ANYPTR_TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr inttoptr (i64 64 to ptr), align 8, !tbaa [[ANYPTR_TBAA8:![0-9]+]] ; CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = tail call i64 [[TMP0]](ptr noundef poison, i64 noundef poison) ; CHECK-NEXT: br label %[[BB5]] @@ -177,7 +177,7 @@ define void @fun3(ptr %0) { !9 = !{!10, !7, i64 64} !10 = !{!"node", !6, i64 0, !3, i64 8, !7, i64 16, !7, i64 24, !7, i64 32, !7, i64 40, !7, i64 48, !7, i64 56, !7, i64 64, !7, i64 72, !6, i64 80, !6, i64 88, !3, i64 96, !3, i64 100} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META6:![0-9]+]], i64 40} +; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META6:![0-9]+]], i64 40} ; CHECK: [[META1]] = !{!"arc", [[META2:![0-9]+]], i64 0, [[META5:![0-9]+]], i64 8, [[META6]], i64 16, [[META6]], i64 24, [[META7:![0-9]+]], i64 32, [[META6]], i64 40, [[META6]], i64 48, [[META5]], i64 56, [[META5]], i64 64} ; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} @@ -185,6 +185,6 @@ define void @fun3(ptr %0) { ; CHECK: [[META5]] = !{!"long", [[META3]], i64 0} ; CHECK: [[META6]] = !{!"any pointer", [[META3]], i64 0} ; CHECK: [[META7]] = !{!"short", [[META3]], i64 0} -; CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META6]], i64 64} +; CHECK: [[ANYPTR_TBAA8]] = !{[[META9:![0-9]+]], [[META6]], i64 64} ; CHECK: [[META9]] = !{!"node", [[META5]], i64 0, [[META2]], i64 8, [[META6]], i64 16, [[META6]], i64 24, [[META6]], i64 32, [[META6]], i64 40, [[META6]], i64 48, [[META6]], i64 56, [[META6]], i64 64, [[META6]], i64 72, [[META5]], i64 80, [[META5]], i64 88, [[META2]], i64 96, [[META2]], i64 100} ;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll index 95ae544e2c62f..6f0521066f0d8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -1,38 +1,39 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-darwin13.3.0" define void @_foo(double %p1, double %p2, double %p3) #0 { -; CHECK-LABEL: @_foo( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @_foo( +; CHECK-SAME: double [[P1:%.*]], double [[P2:%.*]], double [[P3:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TAB1:%.*]] = alloca [256 x i32], align 16 ; CHECK-NEXT: [[TAB2:%.*]] = alloca [256 x i32], align 16 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04 +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3]], 1.638400e+04 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, %[[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], %[[BB1]] ], [ [[TMP6:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]] -; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] ; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]] -; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] -; CHECK: return: +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[RETURN:.*]], label %[[FOR_BODY]] +; CHECK: [[RETURN]]: ; CHECK-NEXT: ret void ; entry: @@ -78,3 +79,9 @@ declare i32 @_xfn(<2 x double>) #4 !4 = !{!3, !3, i64 0} !5 = !{!"omnipotent char", !6, i64 0} !6 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll index 1e31772b8e49e..2d9e1f79e827c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -1,38 +1,39 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-darwin13.3.0" define void @_foo(double %p1, double %p2, double %p3) #0 { -; CHECK-LABEL: @_foo( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @_foo( +; CHECK-SAME: double [[P1:%.*]], double [[P2:%.*]], double [[P3:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TAB1:%.*]] = alloca [256 x i32], align 16 ; CHECK-NEXT: [[TAB2:%.*]] = alloca [256 x i32], align 16 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04 +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3]], 1.638400e+04 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, %[[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], %[[BB1]] ], [ [[TMP6:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]] -; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] ; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]] -; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] -; CHECK: return: +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[RETURN:.*]], label %[[FOR_BODY]] +; CHECK: [[RETURN]]: ; CHECK-NEXT: ret void ; entry: @@ -78,3 +79,9 @@ declare i32 @_xfn(<2 x double>) #4 !4 = !{!3, !3, i64 0} !5 = !{!"omnipotent char", !6, i64 0} !6 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll index c4bdfa804868e..635ec32ca055b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll @@ -1,16 +1,17 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=slp-vectorizer,dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" define void @test1(ptr %a, ptr %b, ptr %c) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !4 -; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[C:%.*]], align 8, !tbaa [[TBAA0]] +; CHECK-LABEL: define void @test1( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8, !tbaa [[DOUBLE_TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B]], align 8, !tbaa [[DOUBLE_TBAA0]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]], !fpmath [[META4:![0-9]+]] +; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[C]], align 8, !tbaa [[DOUBLE_TBAA0]] ; CHECK-NEXT: ret void ; entry: @@ -29,12 +30,13 @@ entry: } define void @test2(ptr %a, ptr %b, ptr %e) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !5 -; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[E:%.*]], align 8, !tbaa [[TBAA0]] +; CHECK-LABEL: define void @test2( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[E:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8, !tbaa [[DOUBLE_TBAA0]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B]], align 8, !tbaa [[DOUBLE_TBAA0]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]], !fpmath [[META5:![0-9]+]] +; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[E]], align 8, !tbaa [[DOUBLE_TBAA0]] ; CHECK-NEXT: ret void ; entry: @@ -52,10 +54,16 @@ entry: ret void } -;CHECK-DAG: !4 = !{float 5.000000e+00} -;CHECK-DAG: !5 = !{float 2.500000e+00} !0 = !{ float 5.0 } !1 = !{ float 2.5 } !2 = !{!"Simple C/C++ TBAA"} !3 = !{!"omnipotent char", !2} !4 = !{!"double", !3} +;. +; CHECK: [[DOUBLE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[META4]] = !{float 5.000000e+00} +; CHECK: [[META5]] = !{float 2.500000e+00} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll index ff4ef6086d42a..1b76ee970e6d8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=i386--netbsd -mcpu=i486 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" target triple = "i386--netbsd" @@ -7,19 +7,20 @@ target triple = "i386--netbsd" ; Function Attrs: noreturn nounwind readonly define i32 @fn1() #0 { -; CHECK-LABEL: @fn1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @a, align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-LABEL: define i32 @fn1( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @a, align 4, !tbaa [[ANYPTR_TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA4:![0-9]+]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: br label [[DO_BODY:%.*]] -; CHECK: do.body: -; CHECK-NEXT: [[C_0:%.*]] = phi i32 [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ADD2:%.*]], [[DO_BODY]] ] -; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ [[TMP1]], [[ENTRY]] ], [ [[ADD:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[INT_TBAA4]] +; CHECK-NEXT: br label %[[DO_BODY:.*]] +; CHECK: [[DO_BODY]]: +; CHECK-NEXT: [[C_0:%.*]] = phi i32 [ [[TMP2]], %[[ENTRY]] ], [ [[ADD2:%.*]], %[[DO_BODY]] ] +; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ [[TMP1]], %[[ENTRY]] ], [ [[ADD:%.*]], %[[DO_BODY]] ] ; CHECK-NEXT: [[ADD]] = add nsw i32 [[B_0]], [[C_0]] ; CHECK-NEXT: [[ADD2]] = add nsw i32 [[ADD]], 1 -; CHECK-NEXT: br label [[DO_BODY]] +; CHECK-NEXT: br label %[[DO_BODY]] ; entry: %0 = load ptr, ptr @a, align 4, !tbaa !4 @@ -44,3 +45,11 @@ attributes #0 = { noreturn nounwind readonly "less-precise-fpmad"="false" "frame !3 = !{!"int", !1} !4 = !{!0, !0, i64 0} !5 = !{!3, !3, i64 0} +;. +; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]} +; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"int", [[META2]]} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll index 6fd2de8ad8ab5..618c316c6f2fa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll @@ -1,15 +1,16 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-- -mcpu=corei7 < %s | FileCheck %s define void @test1(float %a, float %b, float %c, float %d, ptr nocapture %p) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C:%.*]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D:%.*]], i32 3 +; CHECK-LABEL: define void @test1( +; CHECK-SAME: float [[A:%.*]], float [[B:%.*]], float [[C:%.*]], float [[D:%.*]], ptr captures(none) [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i32> -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[P]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -28,14 +29,15 @@ entry: } define void @test1_vec(float %a, float %b, float %c, float %d, ptr nocapture %p) { -; CHECK-LABEL: @test1_vec( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C:%.*]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D:%.*]], i32 3 +; CHECK-LABEL: define void @test1_vec( +; CHECK-SAME: float [[A:%.*]], float [[B:%.*]], float [[C:%.*]], float [[D:%.*]], ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i32> -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 16, !tbaa [[TBAA0]] +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[P]], align 16, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: ret void ; entry: @@ -52,14 +54,15 @@ entry: } define void @test2(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture %p) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[D:%.*]], i32 3 +; CHECK-LABEL: define void @test2( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]], ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[B]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[D]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 1) -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[P]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: ret void ; entry: @@ -78,13 +81,14 @@ entry: } define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, ptr nocapture %4) { -; CHECK-LABEL: @test2_vec( -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP1:%.*]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP2:%.*]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3:%.*]], i32 3 +; CHECK-LABEL: define void @test2_vec( +; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], ptr captures(none) [[TMP4:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], splat (i32 1) -; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP4:%.*]], align 16, !tbaa [[TBAA0]] +; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP4]], align 16, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: ret void ; %6 = add nsw i32 %0, 1 @@ -103,3 +107,9 @@ define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, ptr nocapture %4) { !3 = !{!"int", !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C++ TBAA"} +;. +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll index 9e4f10ec7b349..9c8ba07734b87 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -slp-threshold=-1 | FileCheck %s ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s @@ -6,15 +6,16 @@ ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw,+avx512vl | FileCheck %s define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) { -; CHECK-LABEL: @store_i32( -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i32 0 +; CHECK-LABEL: define void @store_i32( +; CHECK-SAME: ptr captures(none) [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], splat (i32 15) ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <4 x i32> [[TMP8]], splat (i32 255) ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> splat (i32 255) -; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA0]] ; CHECK-NEXT: ret void ; %4 = load i32, ptr %0, align 4, !tbaa !2 @@ -48,17 +49,18 @@ define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) { } define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) { -; CHECK-LABEL: @store_i8( -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[TMP0:%.*]], align 1, !tbaa [[TBAA4:![0-9]+]] +; CHECK-LABEL: define void @store_i8( +; CHECK-SAME: ptr captures(none) [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1, !tbaa [[CHAR_TBAA4:![0-9]+]] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], splat (i32 15) ; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], splat (i32 255) ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP9]], <4 x i32> splat (i32 255) ; CHECK-NEXT: [[TMP12:%.*]] = trunc <4 x i32> [[TMP11]] to <4 x i8> -; CHECK-NEXT: store <4 x i8> [[TMP12]], ptr [[TMP0]], align 1, !tbaa [[TBAA4]] +; CHECK-NEXT: store <4 x i8> [[TMP12]], ptr [[TMP0]], align 1, !tbaa [[CHAR_TBAA4]] ; CHECK-NEXT: ret void ; %4 = load i8, ptr %0, align 1, !tbaa !6 @@ -100,9 +102,10 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) { } define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) { -; CHECK-LABEL: @store_i64( -; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]] +; CHECK-LABEL: define void @store_i64( +; CHECK-SAME: ptr captures(none) [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8, !tbaa [[LONG_TBAA5:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]] @@ -111,7 +114,7 @@ define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], splat (i32 255) ; CHECK-NEXT: [[TMP12:%.*]] = and <4 x i64> [[TMP9]], splat (i64 4294967295) ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> splat (i64 255) -; CHECK-NEXT: store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]] +; CHECK-NEXT: store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[LONG_TBAA5]] ; CHECK-NEXT: ret void ; %4 = zext i32 %1 to i64 @@ -160,3 +163,12 @@ define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) { !6 = !{!4, !4, i64 0} !7 = !{!8, !8, i64 0} !8 = !{!"long", !4, i64 0} +;. +; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +; CHECK: [[CHAR_TBAA4]] = !{[[META2]], [[META2]], i64 0} +; CHECK: [[LONG_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"long", [[META2]], i64 0} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll index db38a62017391..fde76f8b0e8b9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+sse2 | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 @@ -6,99 +6,104 @@ ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { -; SSE-LABEL: @gather_load( -; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; SSE-LABEL: define void @gather_load( +; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; SSE-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; SSE-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; SSE-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; SSE-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; SSE-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load( -; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; AVX-LABEL: define void @gather_load( +; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; AVX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; AVX-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; AVX-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; AVX-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load( -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; AVX2-LABEL: define void @gather_load( +; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; AVX2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; AVX2-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; AVX2-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; AVX2-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX2-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load( -; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; AVX512F-LABEL: define void @gather_load( +; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; AVX512F-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; AVX512F-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; AVX512F-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; AVX512F-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; AVX512F-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX512F-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load( -; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; AVX512VL-LABEL: define void @gather_load( +; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX512VL-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; AVX512VL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX512VL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; AVX512VL-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; AVX512VL-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; AVX512VL-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; AVX512VL-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -121,78 +126,83 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl } define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { -; SSE-LABEL: @gather_load_2( -; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; SSE-LABEL: define void @gather_load_2( +; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1 -; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 -; SSE-NEXT: store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 +; SSE-NEXT: store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10 -; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2 ; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 -; SSE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3 -; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3 ; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; SSE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5 -; SSE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4 -; SSE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load_2( -; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX-LABEL: define void @gather_load_2( +; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10 -; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3 -; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5 -; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i32 1 ; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i32 2 ; AVX-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 3 ; AVX-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], -; AVX-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load_2( -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX2-LABEL: define void @gather_load_2( +; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10 -; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3 -; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5 -; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; AVX2-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i32 1 ; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i32 2 ; AVX2-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 3 ; AVX2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], -; AVX2-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load_2( -; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512F-NEXT: [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> , <10 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-LABEL: define void @gather_load_2( +; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX512F-NEXT: [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> , <10 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP5:%.*]] = shufflevector <10 x i32> [[TMP4]], <10 x i32> poison, <4 x i32> ; AVX512F-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], ; AVX512F-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> -; AVX512F-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load_2( -; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512VL-NEXT: [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> , <10 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-LABEL: define void @gather_load_2( +; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX512VL-NEXT: [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> , <10 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP5:%.*]] = shufflevector <10 x i32> [[TMP4]], <10 x i32> poison, <4 x i32> ; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], ; AVX512VL-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> -; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -219,63 +229,65 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { -; SSE-LABEL: @gather_load_3( -; SSE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; SSE-LABEL: define void @gather_load_3( +; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1 -; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 -; SSE-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 +; SSE-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 -; SSE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; SSE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3 ; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; SSE-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15 -; SSE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 4 ; SSE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 -; SSE-NEXT: store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18 -; SSE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 1 ; SSE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 5 -; SSE-NEXT: store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9 -; SSE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 2 ; SSE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 6 -; SSE-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6 -; SSE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 3 ; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 7 -; SSE-NEXT: store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21 -; SSE-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 4 -; SSE-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load_3( -; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-LABEL: define void @gather_load_3( +; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15 -; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18 -; AVX-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9 -; AVX-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6 -; AVX-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21 -; AVX-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i32 0 ; AVX-NEXT: [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i32 1 ; AVX-NEXT: [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i32 2 @@ -285,31 +297,34 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX-NEXT: [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i32 6 ; AVX-NEXT: [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i32 7 ; AVX-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], -; AVX-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load_3( -; AVX2-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX2-LABEL: define void @gather_load_3( +; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> ; AVX2-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> -; AVX2-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load_3( -; AVX512F-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-LABEL: define void @gather_load_3( +; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512F-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> ; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512F-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> -; AVX512F-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load_3( -; AVX512VL-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-LABEL: define void @gather_load_3( +; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512VL-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> ; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512VL-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> -; AVX512VL-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load i32, ptr %1, align 4, !tbaa !2 @@ -354,9 +369,10 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado } define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) { -; SSE-LABEL: @gather_load_4( -; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0:%.*]], i64 1 -; SSE-NEXT: [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11 +; SSE-LABEL: define void @gather_load_4( +; SSE-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 1 +; SSE-NEXT: [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 11 ; SSE-NEXT: [[T9:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 2 ; SSE-NEXT: [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4 ; SSE-NEXT: [[T13:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 3 @@ -369,14 +385,14 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; SSE-NEXT: [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6 ; SSE-NEXT: [[T29:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 7 ; SSE-NEXT: [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21 -; SSE-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[T4:%.*]] = add i32 [[T3]], 1 ; SSE-NEXT: [[T8:%.*]] = add i32 [[T7]], 2 ; SSE-NEXT: [[T12:%.*]] = add i32 [[T11]], 3 @@ -385,32 +401,33 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; SSE-NEXT: [[T24:%.*]] = add i32 [[T23]], 2 ; SSE-NEXT: [[T28:%.*]] = add i32 [[T27]], 3 ; SSE-NEXT: [[T32:%.*]] = add i32 [[T31]], 4 -; SSE-NEXT: store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load_4( -; AVX-NEXT: [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11 +; AVX-LABEL: define void @gather_load_4( +; AVX-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 11 ; AVX-NEXT: [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4 ; AVX-NEXT: [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15 ; AVX-NEXT: [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18 ; AVX-NEXT: [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9 ; AVX-NEXT: [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6 ; AVX-NEXT: [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21 -; AVX-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i32 0 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i32 1 ; AVX-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i32 2 @@ -420,31 +437,34 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i32 6 ; AVX-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i32 7 ; AVX-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], -; AVX-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <8 x i32> [[TMP9]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load_4( -; AVX2-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX2-LABEL: define void @gather_load_4( +; AVX2-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> -; AVX2-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load_4( -; AVX512F-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-LABEL: define void @gather_load_4( +; AVX512F-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; AVX512F-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> ; AVX512F-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> -; AVX512F-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load_4( -; AVX512VL-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-LABEL: define void @gather_load_4( +; AVX512VL-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; AVX512VL-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> ; AVX512VL-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> -; AVX512VL-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %t5 = getelementptr inbounds i32, ptr %t0, i64 1 @@ -494,17 +514,18 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { -; SSE-LABEL: @gather_load_div( -; SSE-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; SSE-LABEL: define void @gather_load_div( +; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10 ; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3 ; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44 -; SSE-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 4 -; SSE-NEXT: [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 +; SSE-NEXT: [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0 ; SSE-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP15:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x i32> @@ -516,23 +537,23 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> [[TMP14]], <4 x i32> ; SSE-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP8]], i32 3 ; SSE-NEXT: [[TMP23:%.*]] = fdiv <4 x float> [[TMP19]], [[TMP22]] -; SSE-NEXT: store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; SSE-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; SSE-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; SSE-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; SSE-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; SSE-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; SSE-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 -; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP40:%.*]] = insertelement <4 x float> poison, float [[TMP25]], i32 0 ; SSE-NEXT: [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP29]], i32 1 ; SSE-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP33]], i32 2 @@ -542,35 +563,36 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP35]], i32 2 ; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP39]], i32 3 ; SSE-NEXT: [[TMP48:%.*]] = fdiv <4 x float> [[TMP43]], [[TMP47]] -; SSE-NEXT: store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load_div( -; AVX-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-LABEL: define void @gather_load_div( +; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13 ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3 ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44 -; AVX-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; AVX-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; AVX-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; AVX-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; AVX-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; AVX-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; AVX-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; AVX-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 -; AVX-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0 ; AVX-NEXT: [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> @@ -590,35 +612,36 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i32 6 ; AVX-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i32 7 ; AVX-NEXT: [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]] -; AVX-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load_div( -; AVX2-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-LABEL: define void @gather_load_div( +; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44 -; AVX2-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; AVX2-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; AVX2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; AVX2-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; AVX2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; AVX2-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; AVX2-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; AVX2-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 -; AVX2-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX2-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX2-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX2-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0 ; AVX2-NEXT: [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> ; AVX2-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> @@ -638,27 +661,29 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX2-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i32 6 ; AVX2-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i32 7 ; AVX2-NEXT: [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]] -; AVX2-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load_div( -; AVX512F-NEXT: [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1:%.*]], i32 4, <45 x i1> , <45 x float> poison), !tbaa [[TBAA0]] +; AVX512F-LABEL: define void @gather_load_div( +; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512F-NEXT: [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1]], i32 4, <45 x i1> , <45 x float> poison), !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <16 x i32> ; AVX512F-NEXT: [[TMP7:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> ; AVX512F-NEXT: [[TMP8:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> ; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512F-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> -; AVX512F-NEXT: store <8 x float> [[TMP10]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <8 x float> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load_div( -; AVX512VL-NEXT: [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1:%.*]], i32 4, <45 x i1> , <45 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-LABEL: define void @gather_load_div( +; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512VL-NEXT: [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1]], i32 4, <45 x i1> , <45 x float> poison), !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <16 x i32> ; AVX512VL-NEXT: [[TMP7:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> ; AVX512VL-NEXT: [[TMP8:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> ; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512VL-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> -; AVX512VL-NEXT: store <8 x float> [[TMP10]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <8 x float> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load float, ptr %1, align 4, !tbaa !2 @@ -722,3 +747,29 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea !3 = !{!"short", !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C++ TBAA"} +;. +; SSE: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; SSE: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; SSE: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; SSE: [[META3]] = !{!"Simple C++ TBAA"} +;. +; AVX: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; AVX: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; AVX: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; AVX: [[META3]] = !{!"Simple C++ TBAA"} +;. +; AVX2: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; AVX2: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; AVX2: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; AVX2: [[META3]] = !{!"Simple C++ TBAA"} +;. +; AVX512F: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; AVX512F: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; AVX512F: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; AVX512F: [[META3]] = !{!"Simple C++ TBAA"} +;. +; AVX512VL: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; AVX512VL: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; AVX512VL: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; AVX512VL: [[META3]] = !{!"Simple C++ TBAA"} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index bfa3610804967..cf380f04a6939 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+sse2 | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 @@ -6,99 +6,104 @@ ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { -; SSE-LABEL: @gather_load( -; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; SSE-LABEL: define void @gather_load( +; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; SSE-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 ; SSE-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; SSE-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; SSE-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; SSE-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load( -; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; AVX-LABEL: define void @gather_load( +; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; AVX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 ; AVX-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; AVX-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; AVX-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load( -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; AVX2-LABEL: define void @gather_load( +; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; AVX2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 ; AVX2-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; AVX2-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; AVX2-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX2-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load( -; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; AVX512F-LABEL: define void @gather_load( +; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; AVX512F-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; AVX512F-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 ; AVX512F-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; AVX512F-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; AVX512F-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX512F-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load( -; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 +; AVX512VL-LABEL: define void @gather_load( +; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0:![0-9]+]] +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 ; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX512VL-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; AVX512VL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX512VL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; AVX512VL-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 ; AVX512VL-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP7]], i32 1 ; AVX512VL-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP10]], i32 2 ; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 3 ; AVX512VL-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -121,78 +126,83 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl } define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { -; SSE-LABEL: @gather_load_2( -; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; SSE-LABEL: define void @gather_load_2( +; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1 -; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 -; SSE-NEXT: store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 +; SSE-NEXT: store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10 -; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2 ; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 -; SSE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3 -; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3 ; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; SSE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5 -; SSE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4 -; SSE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load_2( -; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX-LABEL: define void @gather_load_2( +; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10 -; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3 -; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5 -; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i32 1 ; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i32 2 ; AVX-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 3 ; AVX-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], -; AVX-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load_2( -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX2-LABEL: define void @gather_load_2( +; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10 -; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3 -; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5 -; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 ; AVX2-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i32 1 ; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i32 2 ; AVX2-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 3 ; AVX2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], -; AVX2-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load_2( -; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512F-NEXT: [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> , <10 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-LABEL: define void @gather_load_2( +; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX512F-NEXT: [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> , <10 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP5:%.*]] = shufflevector <10 x i32> [[TMP4]], <10 x i32> poison, <4 x i32> ; AVX512F-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], ; AVX512F-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> -; AVX512F-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load_2( -; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512VL-NEXT: [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> , <10 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-LABEL: define void @gather_load_2( +; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; AVX512VL-NEXT: [[TMP4:%.*]] = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr [[TMP3]], i32 4, <10 x i1> , <10 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP5:%.*]] = shufflevector <10 x i32> [[TMP4]], <10 x i32> poison, <4 x i32> ; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], ; AVX512VL-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> -; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -219,63 +229,65 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { -; SSE-LABEL: @gather_load_3( -; SSE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; SSE-LABEL: define void @gather_load_3( +; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1 -; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1 -; SSE-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1 +; SSE-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 -; SSE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; SSE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3 ; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3 -; SSE-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15 -; SSE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 4 ; SSE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 -; SSE-NEXT: store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18 -; SSE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 1 ; SSE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 5 -; SSE-NEXT: store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9 -; SSE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 2 ; SSE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 6 -; SSE-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6 -; SSE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 3 ; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 7 -; SSE-NEXT: store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21 -; SSE-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 4 -; SSE-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load_3( -; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-LABEL: define void @gather_load_3( +; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15 -; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18 -; AVX-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9 -; AVX-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6 -; AVX-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21 -; AVX-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i32 0 ; AVX-NEXT: [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i32 1 ; AVX-NEXT: [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i32 2 @@ -285,31 +297,34 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX-NEXT: [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i32 6 ; AVX-NEXT: [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i32 7 ; AVX-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], -; AVX-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load_3( -; AVX2-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX2-LABEL: define void @gather_load_3( +; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> ; AVX2-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> -; AVX2-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load_3( -; AVX512F-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-LABEL: define void @gather_load_3( +; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512F-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> ; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512F-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> -; AVX512F-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load_3( -; AVX512VL-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-LABEL: define void @gather_load_3( +; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512VL-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> ; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512VL-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> -; AVX512VL-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load i32, ptr %1, align 4, !tbaa !2 @@ -354,9 +369,10 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado } define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) { -; SSE-LABEL: @gather_load_4( -; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0:%.*]], i64 1 -; SSE-NEXT: [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11 +; SSE-LABEL: define void @gather_load_4( +; SSE-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 1 +; SSE-NEXT: [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 11 ; SSE-NEXT: [[T9:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 2 ; SSE-NEXT: [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4 ; SSE-NEXT: [[T13:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 3 @@ -369,14 +385,14 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; SSE-NEXT: [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6 ; SSE-NEXT: [[T29:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 7 ; SSE-NEXT: [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21 -; SSE-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[T4:%.*]] = add i32 [[T3]], 1 ; SSE-NEXT: [[T8:%.*]] = add i32 [[T7]], 2 ; SSE-NEXT: [[T12:%.*]] = add i32 [[T11]], 3 @@ -385,32 +401,33 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; SSE-NEXT: [[T24:%.*]] = add i32 [[T23]], 2 ; SSE-NEXT: [[T28:%.*]] = add i32 [[T27]], 3 ; SSE-NEXT: [[T32:%.*]] = add i32 [[T31]], 4 -; SSE-NEXT: store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load_4( -; AVX-NEXT: [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11 +; AVX-LABEL: define void @gather_load_4( +; AVX-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 11 ; AVX-NEXT: [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4 ; AVX-NEXT: [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15 ; AVX-NEXT: [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18 ; AVX-NEXT: [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9 ; AVX-NEXT: [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6 ; AVX-NEXT: [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21 -; AVX-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i32 0 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i32 1 ; AVX-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i32 2 @@ -420,31 +437,34 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i32 6 ; AVX-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i32 7 ; AVX-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], -; AVX-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <8 x i32> [[TMP9]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load_4( -; AVX2-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX2-LABEL: define void @gather_load_4( +; AVX2-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> -; AVX2-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load_4( -; AVX512F-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-LABEL: define void @gather_load_4( +; AVX512F-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; AVX512F-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> ; AVX512F-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> -; AVX512F-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load_4( -; AVX512VL-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-LABEL: define void @gather_load_4( +; AVX512VL-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0]] { +; AVX512VL-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> ; AVX512VL-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> -; AVX512VL-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <8 x i32> [[TMP4]], ptr [[T0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %t5 = getelementptr inbounds i32, ptr %t0, i64 1 @@ -494,17 +514,18 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { -; SSE-LABEL: @gather_load_div( -; SSE-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; SSE-LABEL: define void @gather_load_div( +; SSE-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10 ; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3 ; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44 -; SSE-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 4 -; SSE-NEXT: [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 +; SSE-NEXT: [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]] +; SSE-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0 ; SSE-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP15:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x i32> @@ -516,23 +537,23 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> [[TMP14]], <4 x i32> ; SSE-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP8]], i32 3 ; SSE-NEXT: [[TMP23:%.*]] = fdiv <4 x float> [[TMP19]], [[TMP22]] -; SSE-NEXT: store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; SSE-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; SSE-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; SSE-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; SSE-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; SSE-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; SSE-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 -; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: [[TMP40:%.*]] = insertelement <4 x float> poison, float [[TMP25]], i32 0 ; SSE-NEXT: [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP29]], i32 1 ; SSE-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP33]], i32 2 @@ -542,35 +563,36 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP35]], i32 2 ; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP39]], i32 3 ; SSE-NEXT: [[TMP48:%.*]] = fdiv <4 x float> [[TMP43]], [[TMP47]] -; SSE-NEXT: store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; SSE-NEXT: ret void ; -; AVX-LABEL: @gather_load_div( -; AVX-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-LABEL: define void @gather_load_div( +; AVX-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13 ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3 ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44 -; AVX-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; AVX-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; AVX-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; AVX-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; AVX-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; AVX-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; AVX-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; AVX-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 -; AVX-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] -; AVX-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0 ; AVX-NEXT: [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> @@ -590,35 +612,36 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i32 6 ; AVX-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i32 7 ; AVX-NEXT: [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]] -; AVX-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX-NEXT: ret void ; -; AVX2-LABEL: @gather_load_div( -; AVX2-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-LABEL: define void @gather_load_div( +; AVX2-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44 -; AVX2-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; AVX2-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; AVX2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; AVX2-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; AVX2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; AVX2-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; AVX2-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; AVX2-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 -; AVX2-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX2-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX2-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[SHORT_TBAA0]] +; AVX2-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0 ; AVX2-NEXT: [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> ; AVX2-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> @@ -638,27 +661,29 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX2-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i32 6 ; AVX2-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i32 7 ; AVX2-NEXT: [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]] -; AVX2-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX2-NEXT: ret void ; -; AVX512F-LABEL: @gather_load_div( -; AVX512F-NEXT: [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1:%.*]], i32 4, <45 x i1> , <45 x float> poison), !tbaa [[TBAA0]] +; AVX512F-LABEL: define void @gather_load_div( +; AVX512F-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512F-NEXT: [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1]], i32 4, <45 x i1> , <45 x float> poison), !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <16 x i32> ; AVX512F-NEXT: [[TMP7:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> ; AVX512F-NEXT: [[TMP8:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> ; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512F-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> -; AVX512F-NEXT: store <8 x float> [[TMP10]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: store <8 x float> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512F-NEXT: ret void ; -; AVX512VL-LABEL: @gather_load_div( -; AVX512VL-NEXT: [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1:%.*]], i32 4, <45 x i1> , <45 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-LABEL: define void @gather_load_div( +; AVX512VL-SAME: ptr noalias captures(none) [[TMP0:%.*]], ptr noalias readonly captures(none) [[TMP1:%.*]]) #[[ATTR0]] { +; AVX512VL-NEXT: [[TMP3:%.*]] = call <45 x float> @llvm.masked.load.v45f32.p0(ptr [[TMP1]], i32 4, <45 x i1> , <45 x float> poison), !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <16 x i32> ; AVX512VL-NEXT: [[TMP7:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> ; AVX512VL-NEXT: [[TMP8:%.*]] = shufflevector <45 x float> [[TMP3]], <45 x float> poison, <8 x i32> ; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512VL-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> -; AVX512VL-NEXT: store <8 x float> [[TMP10]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: store <8 x float> [[TMP10]], ptr [[TMP0]], align 4, !tbaa [[SHORT_TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load float, ptr %1, align 4, !tbaa !2 @@ -722,3 +747,29 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea !3 = !{!"short", !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C++ TBAA"} +;. +; SSE: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; SSE: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; SSE: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; SSE: [[META3]] = !{!"Simple C++ TBAA"} +;. +; AVX: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; AVX: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; AVX: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; AVX: [[META3]] = !{!"Simple C++ TBAA"} +;. +; AVX2: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; AVX2: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; AVX2: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; AVX2: [[META3]] = !{!"Simple C++ TBAA"} +;. +; AVX512F: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; AVX512F: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; AVX512F: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; AVX512F: [[META3]] = !{!"Simple C++ TBAA"} +;. +; AVX512VL: [[SHORT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; AVX512VL: [[META1]] = !{!"short", [[META2:![0-9]+]], i64 0} +; AVX512VL: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; AVX512VL: [[META3]] = !{!"Simple C++ TBAA"} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll index 26258402b9781..253f08450a2b7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll @@ -1,13 +1,14 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-- -mcpu=skylake-avx512 | FileCheck %s define void @foo(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[T1:%.*]], align 1, !tbaa [[TBAA0:![0-9]+]] +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ptr noalias captures(none) [[T0:%.*]], ptr noalias readonly captures(none) [[T1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[T1]], align 1, !tbaa [[CHAR_TBAA0:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <8 x i8> [[TMP2]], splat (i8 64) ; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i8> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP2]], <8 x i8> [[TMP4]] -; CHECK-NEXT: store <8 x i8> [[TMP5]], ptr [[T0:%.*]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: store <8 x i8> [[TMP5]], ptr [[T0]], align 1, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: ret void ; %t3 = load i8, ptr %t1, align 1, !tbaa !3 @@ -70,3 +71,8 @@ define void @foo(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) !3 = !{!4, !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C++ TBAA"} +;. +; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"Simple C++ TBAA"} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll index 2cd7adaad969f..b409aa74acd48 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll @@ -1,26 +1,27 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -passes=slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s define void @vsub2_test(ptr %pin1, ptr %pin2, ptr %pout) #0 { -; CHECK-LABEL: @vsub2_test( -; CHECK-NEXT: br label [[TMP1:%.*]] -; CHECK: 1: -; CHECK-NEXT: [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[TMP1]] ] -; CHECK-NEXT: [[PO_03:%.*]] = phi ptr [ [[POUT:%.*]], [[TMP0]] ], [ [[TMP7:%.*]], [[TMP1]] ] -; CHECK-NEXT: [[PTMPI2_02:%.*]] = phi ptr [ [[PIN2:%.*]], [[TMP0]] ], [ [[TMP4:%.*]], [[TMP1]] ] -; CHECK-NEXT: [[PTMPI1_01:%.*]] = phi ptr [ [[PIN1:%.*]], [[TMP0]] ], [ [[TMP2:%.*]], [[TMP1]] ] +; CHECK-LABEL: define void @vsub2_test( +; CHECK-SAME: ptr [[PIN1:%.*]], ptr [[PIN2:%.*]], ptr [[POUT:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: br label %[[TMP1:.*]] +; CHECK: [[TMP1]]: +; CHECK-NEXT: [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], %[[TMP1]] ] +; CHECK-NEXT: [[PO_03:%.*]] = phi ptr [ [[POUT]], [[TMP0]] ], [ [[TMP7:%.*]], %[[TMP1]] ] +; CHECK-NEXT: [[PTMPI2_02:%.*]] = phi ptr [ [[PIN2]], [[TMP0]] ], [ [[TMP4:%.*]], %[[TMP1]] ] +; CHECK-NEXT: [[PTMPI1_01:%.*]] = phi ptr [ [[PIN1]], [[TMP0]] ], [ [[TMP2:%.*]], %[[TMP1]] ] ; CHECK-NEXT: [[TMP2]] = getelementptr inbounds i32, ptr [[PTMPI1_01]], i64 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[PTMPI1_01]], align 4, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[PTMPI1_01]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] ; CHECK-NEXT: [[TMP4]] = getelementptr inbounds i32, ptr [[PTMPI2_02]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[PTMPI2_02]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[PTMPI2_02]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw i32 [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7]] = getelementptr inbounds i32, ptr [[PO_03]], i64 1 -; CHECK-NEXT: store i32 [[TMP6]], ptr [[PO_03]], align 4, !tbaa [[TBAA1]] +; CHECK-NEXT: store i32 [[TMP6]], ptr [[PO_03]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP8]] = add nuw nsw i32 [[IDX_04]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP8]], 64 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[TMP9:%.*]], label [[TMP1]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: 9: +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[BB9:.*]], label %[[TMP1]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[BB9]]: ; CHECK-NEXT: ret void ; br label %1 @@ -61,3 +62,12 @@ define void @vsub2_test(ptr %pin1, ptr %pin2, ptr %pout) #0 { !5 = distinct !{!5, !6, !7} !6 = !{!"llvm.loop.vectorize.width", i32 1} !7 = !{!"llvm.loop.interleave.count", i32 1} +;. +; CHECK: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]]} +; CHECK: [[META6]] = !{!"llvm.loop.vectorize.width", i32 1} +; CHECK: [[META7]] = !{!"llvm.loop.interleave.count", i32 1} +;. diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll index 545fa47eecb2c..be91a87b6175d 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct2.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG @@ -9,11 +9,12 @@ declare void @llvm.memcpy.p0.p0.i64(ptr writeonly, ptr readonly, i64, i1 immarg) declare double @subcall(double %g, i32 %m) define double @bar(ptr %wishart) { -; CHECK-LABEL: @bar( +; CHECK-LABEL: define double @bar( +; CHECK-SAME: ptr [[WISHART:%.*]]) { ; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4 -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART]], align 8, !tbaa [[DOUBLE_TBAA0:![0-9]+]] ; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8 -; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[INT_TBAA4:![0-9]+]] ; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT6:![0-9]+]] ; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]]) @@ -38,11 +39,11 @@ define double @bar(ptr %wishart) { ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[DOUBLE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0} ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"Simple C++ TBAA"} -; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} ; CHECK: [[META5]] = !{!"int", [[META2]], i64 0} ; CHECK: [[TBAA_STRUCT6]] = !{} ;. diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index 5326b9802ec6d..6a0cacc7016f7 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -p sroa -S %s | FileCheck %s @@ -7,12 +7,12 @@ target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32- define void @load_store_transfer_split_struct_tbaa_2_float(ptr dereferenceable(24) %res, float %a, float %b) { ; CHECK-LABEL: define void @load_store_transfer_split_struct_tbaa_2_float( ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[B]] to i32 -; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa [[FLOAT_TBAA0:![0-9]+]] ; CHECK-NEXT: [[RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 4 -; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[RES]], align 8 ; CHECK-NEXT: ret void ; @@ -30,11 +30,11 @@ entry: define void @memcpy_transfer(ptr dereferenceable(24) %res, float %a, float %b) { ; CHECK-LABEL: define void @memcpy_transfer( ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8 -; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4 -; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: ret void ; entry: @@ -50,9 +50,9 @@ entry: define void @memcpy_transfer_tbaa_field_and_size_do_not_align(ptr dereferenceable(24) %res, float %a, float %b) { ; CHECK-LABEL: define void @memcpy_transfer_tbaa_field_and_size_do_not_align( ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8 -; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B]] to i32 ; CHECK-NEXT: [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 @@ -72,7 +72,7 @@ entry: define void @load_store_transfer_split_struct_tbaa_2_i31(ptr dereferenceable(24) %res, i31 %a, i31 %b) { ; CHECK-LABEL: define void @load_store_transfer_split_struct_tbaa_2_i31( ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], i31 [[A:%.*]], i31 [[B:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP:%.*]] = alloca { i31, i31 }, align 4 ; CHECK-NEXT: store i31 [[A]], ptr [[TMP]], align 4 ; CHECK-NEXT: [[TMP_4_TMP_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 4 @@ -98,9 +98,9 @@ define void @store_vector_part_first(ptr %y2, float %f) { ; CHECK-LABEL: define void @store_vector_part_first( ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) { ; CHECK-NEXT: [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]]) -; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 8, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 8, !tbaa [[V2F32_TBAA5:![0-9]+]] ; CHECK-NEXT: [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 8 -; CHECK-NEXT: store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: ret void ; %x7 = alloca { float, float, float, float } @@ -116,9 +116,9 @@ define void @store_vector_part_second(ptr %y2, float %f) { ; CHECK-LABEL: define void @store_vector_part_second( ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) { ; CHECK-NEXT: [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]]) -; CHECK-NEXT: store float [[F]], ptr [[Y2]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: store float [[F]], ptr [[Y2]], align 8, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 4 -; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa [[TBAA5]] +; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: ret void ; %x7 = alloca { float, float, float, float } @@ -134,7 +134,7 @@ define void @store_vector_single(ptr %y2, float %f) { ; CHECK-LABEL: define void @store_vector_single( ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) { ; CHECK-NEXT: [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]]) -; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa [[TBAA5]] +; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: ret void ; %x7 = alloca { float, float } @@ -149,7 +149,7 @@ declare void @llvm.memset.p0.i8(ptr nocapture, i8, i32, i1) nounwind define void @memset(ptr %dst, ptr align 8 %src) { ; CHECK-LABEL: define void @memset( ; CHECK-SAME: ptr [[DST:%.*]], ptr align 8 [[SRC:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [7 x i8], align 1 ; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[A_SROA_4:%.*]] = alloca [10 x i8], align 1 @@ -162,7 +162,7 @@ define void @memset(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: store i16 1, ptr [[A_SROA_3]], align 2 ; CHECK-NEXT: [[A_SROA_0_1_A_1_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1 ; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false) -; CHECK-NEXT: store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa [[TBAA0]] +; CHECK-NEXT: store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 7, i1 true) ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 7 ; CHECK-NEXT: [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A_SROA_3]], align 2 @@ -187,7 +187,7 @@ entry: define void @memset2(ptr %dst, ptr align 8 %src) { ; CHECK-LABEL: define void @memset2( ; CHECK-SAME: ptr [[DST:%.*]], ptr align 8 [[SRC:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [209 x i8], align 1 ; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[A_SROA_4:%.*]] = alloca [90 x i8], align 1 @@ -199,8 +199,8 @@ define void @memset2(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 2 [[A_SROA_4_0_SRC_SROA_IDX]], i32 90, i1 false) ; CHECK-NEXT: store i8 1, ptr [[A_SROA_3]], align 1 ; CHECK-NEXT: [[A_SROA_0_202_A_202_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 202 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa [[TBAA5]] -; CHECK-NEXT: store i8 42, ptr [[A_SROA_3]], align 1, !tbaa [[TBAA5]] +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa [[V2F32_TBAA5]] +; CHECK-NEXT: store i8 42, ptr [[A_SROA_3]], align 1, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 209, i1 true) ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 209 ; CHECK-NEXT: [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i8, ptr [[A_SROA_3]], align 1 @@ -233,7 +233,7 @@ entry: define void @slice_store_v2i8_1(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-LABEL: define void @slice_store_v2i8_1( ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [6 x i8], align 1 ; CHECK-NEXT: [[A_SROA_2_SROA_0:%.*]] = alloca <2 x i8>, align 4 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], ptr align 8 [[SRC]], i32 6, i1 false) @@ -268,7 +268,7 @@ entry: define void @slice_store_v2i8_2(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-LABEL: define void @slice_store_v2i8_2( ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A_SROA_0_SROA_1:%.*]] = alloca <2 x i8>, align 2 ; CHECK-NEXT: [[A_SROA_0_SROA_4:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[A_SROA_4:%.*]] = alloca [5 x i8], align 1 @@ -317,7 +317,7 @@ define double @tbaa_struct_load(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 8 ; CHECK-NEXT: [[TMP_SROA_3_0_COPYLOAD:%.*]] = load i64, ptr [[TMP_SROA_3_0_SRC_SROA_IDX]], align 8 ; CHECK-NEXT: store i64 [[TMP_SROA_3_0_COPYLOAD]], ptr [[TMP_SROA_3]], align 8 -; CHECK-NEXT: [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa [[TBAA5]] +; CHECK-NEXT: [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1:%.*]] = load volatile double, ptr [[TMP_SROA_0]], align 8 ; CHECK-NEXT: store volatile double [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 8 ; CHECK-NEXT: [[TMP_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8 @@ -335,9 +335,9 @@ define double @tbaa_struct_load(ptr %src, ptr %dst) { define i32 @shorten_integer_store_single_field(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-LABEL: define i32 @shorten_integer_store_single_field( ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[FLOAT_TBAA0]] ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i32, ptr [[A_SROA_0]], align 4 ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4 ; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD]], ptr [[DST]], align 1 @@ -354,9 +354,9 @@ entry: define i32 @shorten_integer_store_multiple_fields(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-LABEL: define i32 @shorten_integer_store_multiple_fields( ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA5]] +; CHECK-NEXT: store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i32, ptr [[A_SROA_0]], align 4 ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4 ; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD]], ptr [[DST]], align 1 @@ -373,7 +373,7 @@ entry: define <2 x i16> @shorten_vector_store_multiple_fields(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-LABEL: define <2 x i16> @shorten_vector_store_multiple_fields( ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca <2 x i32>, align 8 ; CHECK-NEXT: store <2 x i32> , ptr [[A_SROA_0]], align 8 ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load <2 x i16>, ptr [[A_SROA_0]], align 8 @@ -391,7 +391,7 @@ entry: define <2 x i16> @shorten_vector_store_single_fields(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-LABEL: define <2 x i16> @shorten_vector_store_single_fields( ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca <2 x i32>, align 8 ; CHECK-NEXT: store <2 x i32> , ptr [[A_SROA_0]], align 8 ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load <2 x i16>, ptr [[A_SROA_0]], align 8 @@ -409,7 +409,7 @@ entry: define i32 @split_load_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) { ; CHECK-LABEL: define i32 @split_load_with_tbaa_struct( ; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A3_SROA_0:%.*]] = alloca i16, align 8 ; CHECK-NEXT: [[A3_SROA_3:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[A3_SROA_33:%.*]] = alloca float, align 4 @@ -429,11 +429,11 @@ define i32 @split_load_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) { ; CHECK-NEXT: [[A3_SROA_5_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9 ; CHECK-NEXT: [[A3_SROA_5_0_COPYLOAD:%.*]] = load i8, ptr [[A3_SROA_5_0_SRC_SROA_IDX]], align 1 ; CHECK-NEXT: store i8 [[A3_SROA_5_0_COPYLOAD]], ptr [[A3_SROA_5]], align 1 -; CHECK-NEXT: [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa [[TBAA5]] +; CHECK-NEXT: [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: [[LOAD4_FCA_0_INSERT:%.*]] = insertvalue { i16, float, i8 } poison, i16 [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD]], 0 -; CHECK-NEXT: [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa [[TBAA5]] +; CHECK-NEXT: [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: [[LOAD4_FCA_1_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_0_INSERT]], float [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD]], 1 -; CHECK-NEXT: [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA5]] +; CHECK-NEXT: [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: [[LOAD4_FCA_2_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_1_INSERT]], i8 [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD]], 2 ; CHECK-NEXT: [[UNWRAP2:%.*]] = extractvalue { i16, float, i8 } [[LOAD4_FCA_2_INSERT]], 1 ; CHECK-NEXT: [[VALCAST2:%.*]] = bitcast float [[UNWRAP2]] to i32 @@ -468,7 +468,7 @@ entry: define i32 @split_store_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) { ; CHECK-LABEL: define i32 @split_store_with_tbaa_struct( ; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) { -; CHECK-NEXT: entry: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A3_SROA_0:%.*]] = alloca i16, align 8 ; CHECK-NEXT: [[A3_SROA_3:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[A3_SROA_33:%.*]] = alloca float, align 4 @@ -492,11 +492,11 @@ define i32 @split_store_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) { ; CHECK-NEXT: [[I_2:%.*]] = insertvalue { i16, float, i8 } [[I_1]], float 3.000000e+00, 1 ; CHECK-NEXT: [[I_3:%.*]] = insertvalue { i16, float, i8 } [[I_2]], i8 99, 2 ; CHECK-NEXT: [[I_3_FCA_0_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 0 -; CHECK-NEXT: store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa [[TBAA5]] +; CHECK-NEXT: store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: [[I_3_FCA_1_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 1 -; CHECK-NEXT: store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa [[TBAA5]] +; CHECK-NEXT: store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: [[I_3_FCA_2_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 2 -; CHECK-NEXT: store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA5]] +; CHECK-NEXT: store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa [[V2F32_TBAA5]] ; CHECK-NEXT: [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A3_SROA_0]], align 8 ; CHECK-NEXT: store volatile i16 [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 1 ; CHECK-NEXT: [[A3_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2 @@ -548,11 +548,11 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias !15 = !{i64 0, i64 7, !6, i64 7, i64 1, !6} !16 = !{i64 0, i64 2, !6, i64 4, i64 4, !6, i64 8, i64 1, !6} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[FLOAT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"float", [[META2:![0-9]+]], i64 0} ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"Simple C++ TBAA"} -; CHECK: [[TBAA_STRUCT4]] = !{i64 0, i64 4, [[TBAA0]], i64 4, i64 4, [[TBAA0]]} -; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[TBAA_STRUCT4]] = !{i64 0, i64 4, [[FLOAT_TBAA0]], i64 4, i64 4, [[FLOAT_TBAA0]]} +; CHECK: [[V2F32_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} ; CHECK: [[META6]] = !{!"v2f32", [[META2]], i64 0} ;. diff --git a/llvm/test/Transforms/SROA/tbaa-subload.ll b/llvm/test/Transforms/SROA/tbaa-subload.ll index b07874da7ab03..4c18006a4d1cb 100644 --- a/llvm/test/Transforms/SROA/tbaa-subload.ll +++ b/llvm/test/Transforms/SROA/tbaa-subload.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG @@ -8,14 +8,14 @@ %class.anon = type <{ %class.ar, [7 x i8], { i64, i64 } }> define void @caller() { -; CHECK-LABEL: @caller( -; CHECK-NEXT: entry: +; CHECK-LABEL: define void @caller() { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[AGG:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 ; CHECK-NEXT: [[OFF:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[AGG]], i32 0, i32 2 ; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds { i64, i64 }, ptr [[OFF]], i32 0, i32 0 -; CHECK-NEXT: store i64 1, ptr [[DOTFCA_0_GEP]], align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store i64 1, ptr [[DOTFCA_0_GEP]], align 8, !tbaa [[CHAR_TBAA0:![0-9]+]] ; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds { i64, i64 }, ptr [[OFF]], i32 0, i32 1 -; CHECK-NEXT: store i64 2, ptr [[DOTFCA_1_GEP]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: store i64 2, ptr [[DOTFCA_1_GEP]], align 8, !tbaa [[CHAR_TBAA0]] ; CHECK-NEXT: call void @use(ptr [[AGG]]) ; CHECK-NEXT: ret void ; @@ -36,11 +36,11 @@ declare void @use(ptr %this) !8 = !{!"_ZTSZN2ax2baEMS_FvvE2an2arE3$_0", !9, i64 0, !3, i64 8} !9 = !{!"_ZTS2ar"} ;. -; CHECK: [[TBAA0]] = !{!1, !3, i64 8} -; CHECK: [[META1:![0-9]+]] = !{!"_ZTSZN2ax2baEMS_FvvE2an2arE3$_0", !2, i64 0, !3, i64 8} -; CHECK: [[META2:![0-9]+]] = !{!"_ZTS2ar"} -; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0} -; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"} +; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META3:![0-9]+]], i64 8} +; CHECK: [[META1]] = !{!"_ZTSZN2ax2baEMS_FvvE2an2arE3$_0", [[META2:![0-9]+]], i64 0, [[META3]], i64 8} +; CHECK: [[META2]] = !{!"_ZTS2ar"} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C++ TBAA"} ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} diff --git a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll index 6cb94e8f561bc..af152d4ba8d05 100644 --- a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll +++ b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt %s -passes='function(scalarizer,dce)' -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -6,21 +6,22 @@ declare <4 x float> @ext(<4 x float>) @g = global <4 x float> zeroinitializer define void @f1(<4 x float> %init, ptr %base, i32 %count) { -; CHECK-LABEL: @f1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0 +; CHECK-LABEL: define void @f1( +; CHECK-SAME: <4 x float> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT]], i64 0 ; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1 ; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2 ; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], %[[ENTRY]] ], [ [[SEL_I0:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], %[[ENTRY]] ], [ [[SEL_I1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], %[[ENTRY]] ], [ [[SEL_I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], %[[ENTRY]] ], [ [[SEL_I3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 -; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE]], i32 [[I]] ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4 @@ -54,8 +55,8 @@ define void @f1(<4 x float> %init, ptr %base, i32 %count) { ; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8 ; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 -; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: @@ -92,21 +93,22 @@ exit: } define void @f2(<4 x i32> %init, ptr %base, i32 %count) { -; CHECK-LABEL: @f2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0 +; CHECK-LABEL: define void @f2( +; CHECK-SAME: <4 x i32> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT]], i64 0 ; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1 ; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2 ; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], %[[ENTRY]] ], [ [[SEL_I0:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], %[[ENTRY]] ], [ [[SEL_I1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], %[[ENTRY]] ], [ [[SEL_I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], %[[ENTRY]] ], [ [[SEL_I3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 -; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE]], i32 [[I]] ; CHECK-NEXT: [[VAL_I0:%.*]] = load i8, ptr [[PTR]], align 4 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i8, ptr [[PTR_I1]], align 1 @@ -139,8 +141,8 @@ define void @f2(<4 x i32> %init, ptr %base, i32 %count) { ; CHECK-NEXT: store i8 [[TRUNC_I2]], ptr [[PTR_I2]], align 2 ; CHECK-NEXT: store i8 [[TRUNC_I3]], ptr [[PTR_I3]], align 1 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 -; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: @@ -172,25 +174,26 @@ exit: ; Check that !tbaa information is preserved. define void @f3(ptr %src, ptr %dst) { -; CHECK-LABEL: @f3( -; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1 +; CHECK-LABEL: define void @f3( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16, !tbaa [[SET1_TBAA0:![0-9]+]] ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1 -; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[SET1_TBAA0]] ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2 -; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[SET1_TBAA0]] ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3 -; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[SET1_TBAA0]] ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] -; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[TBAA3:![0-9]+]] -; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[SET2_TBAA3:![0-9]+]] +; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[SET2_TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[SET2_TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[SET2_TBAA3]] ; CHECK-NEXT: ret void ; %val = load <4 x i32> , ptr %src, !tbaa !1 @@ -201,11 +204,12 @@ define void @f3(ptr %src, ptr %dst) { ; Check that !tbaa.struct information is preserved. define void @f4(ptr %src, ptr %dst) { -; CHECK-LABEL: @f4( -; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1 +; CHECK-LABEL: define void @f4( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]] ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2 @@ -230,16 +234,17 @@ define void @f4(ptr %src, ptr %dst) { ; Check that llvm.access.group information is preserved. define void @f5(i32 %count, ptr %src, ptr %dst) { -; CHECK-LABEL: @f5( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC:%.*]], i32 [[INDEX]] +; CHECK-LABEL: define void @f5( +; CHECK-SAME: i32 [[COUNT:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT_INDEX:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[THIS_SRC_I1:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 1 ; CHECK-NEXT: [[THIS_SRC_I2:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 2 ; CHECK-NEXT: [[THIS_SRC_I3:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 3 -; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[THIS_DST_I1:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 1 ; CHECK-NEXT: [[THIS_DST_I2:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 2 ; CHECK-NEXT: [[THIS_DST_I3:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 3 @@ -256,9 +261,9 @@ define void @f5(i32 %count, ptr %src, ptr %dst) { ; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[THIS_DST_I2]], align 8, !llvm.access.group [[ACC_GRP6]] ; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[THIS_DST_I3]], align 4, !llvm.access.group [[ACC_GRP6]] ; CHECK-NEXT: [[NEXT_INDEX]] = add i32 [[INDEX]], -1 -; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]] -; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: end: +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT]] +; CHECK-NEXT: br i1 [[CONTINUE]], label %[[LOOP]], label %[[END:.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[END]]: ; CHECK-NEXT: ret void ; entry: @@ -281,15 +286,16 @@ end: ; Check that fpmath information is preserved. define <4 x float> @f6(<4 x float> %x) { -; CHECK-LABEL: @f6( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0 -; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9 +; CHECK-LABEL: define <4 x float> @f6( +; CHECK-SAME: <4 x float> [[X:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X]], i64 0 +; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath [[META9:![0-9]+]] ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1 -; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9 +; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath [[META9]] ; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i64 2 -; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9 +; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath [[META9]] ; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i64 3 -; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9 +; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath [[META9]] ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i64 0 ; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 ; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i64 2 @@ -303,11 +309,12 @@ define <4 x float> @f6(<4 x float> %x) { ; Check that random metadata isn't kept. define void @f7(ptr %src, ptr %dst) { -; CHECK-LABEL: @f7( -; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1 +; CHECK-LABEL: define void @f7( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2 @@ -332,17 +339,18 @@ define void @f7(ptr %src, ptr %dst) { ; Test GEP with vectors. define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0, -; CHECK-LABEL: @f8( -; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 +; CHECK-LABEL: define void @f8( +; CHECK-SAME: ptr [[DEST:%.*]], <4 x ptr> [[PTR0:%.*]], <4 x i32> [[I0:%.*]], ptr [[OTHER:%.*]]) { +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0 +; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 0 ; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2 ; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3 -; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1 +; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0]], i64 1 ; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100 -; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]] +; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER]], i32 [[I0_I1]] ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100 ; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr float, ptr [[PTR0_I3]], i32 [[I0_I3]] ; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32 @@ -362,11 +370,12 @@ define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0, ; Test the handling of unaligned loads. define void @f9(ptr %dest, ptr %src) { -; CHECK-LABEL: @f9( -; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1 +; CHECK-LABEL: define void @f9( +; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 4 +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC]], align 4 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 4 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2 @@ -386,11 +395,12 @@ define void @f9(ptr %dest, ptr %src) { ; ...and again with subelement alignment. define void @f10(ptr %dest, ptr %src) { -; CHECK-LABEL: @f10( -; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1 +; CHECK-LABEL: define void @f10( +; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 1 +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC]], align 1 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 1 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2 @@ -410,8 +420,9 @@ define void @f10(ptr %dest, ptr %src) { ; Test that sub-byte loads aren't scalarized. define void @f11(ptr %dest, ptr %src0) { -; CHECK-LABEL: @f11( -; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1 +; CHECK-LABEL: define void @f11( +; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC0:%.*]]) { +; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0]], i32 1 ; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4 ; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0 ; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1 @@ -542,7 +553,7 @@ define void @f11(ptr %dest, ptr %src0) { ; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29 ; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30 ; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31 -; CHECK-NEXT: store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: store <32 x i1> [[AND]], ptr [[DEST]], align 4 ; CHECK-NEXT: ret void ; %src1 = getelementptr <32 x i1>, ptr %src0, i32 1 @@ -555,12 +566,13 @@ define void @f11(ptr %dest, ptr %src0) { ; Test vector GEPs with more than one index. define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i, -; CHECK-LABEL: @f13( -; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 +; CHECK-LABEL: define void @f13( +; CHECK-SAME: ptr [[DEST:%.*]], <4 x ptr> [[PTR:%.*]], <4 x i32> [[I:%.*]], ptr [[OTHER:%.*]]) { +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0 +; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR]], i64 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I]], i64 0 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]] ; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1 ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1 @@ -587,19 +599,20 @@ define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i, ; Test combinations of vector and non-vector PHIs. define <4 x float> @f14(<4 x float> %acc, i32 %count) { -; CHECK-LABEL: @f14( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0 +; CHECK-LABEL: define <4 x float> @f14( +; CHECK-SAME: <4 x float> [[ACC:%.*]], i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC]], i64 0 ; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1 ; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2 ; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], %[[ENTRY]] ], [ [[NEXT_ACC_I0:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], %[[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], %[[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], %[[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXT_COUNT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0 ; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1 ; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2 @@ -619,8 +632,8 @@ define <4 x float> @f14(<4 x float> %acc, i32 %count) { ; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3 ; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret <4 x float> [[NEXT_ACC]] ; entry: @@ -641,13 +654,14 @@ exit: ; Test unary operator scalarization. define void @f15(<4 x float> %init, ptr %base, i32 %count) { -; CHECK-LABEL: @f15( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-LABEL: define void @f15( +; CHECK-SAME: <4 x float> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 -; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE]], i32 [[I]] ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4 @@ -681,8 +695,8 @@ define void @f15(<4 x float> %init, ptr %base, i32 %count) { ; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8 ; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 -; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: @@ -712,9 +726,10 @@ exit: ; Check that IR flags are preserved. define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { -; CHECK-LABEL: @f16( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 +; CHECK-LABEL: define <2 x i32> @f16( +; CHECK-SAME: <2 x i32> [[I:%.*]], <2 x i32> [[J:%.*]]) { +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 ; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 @@ -727,9 +742,10 @@ define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { ret <2 x i32> %res } define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { -; CHECK-LABEL: @f17( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 +; CHECK-LABEL: define <2 x i32> @f17( +; CHECK-SAME: <2 x i32> [[I:%.*]], <2 x i32> [[J:%.*]]) { +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]] ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 ; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 @@ -742,9 +758,10 @@ define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { ret <2 x i32> %res } define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @f18( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 +; CHECK-LABEL: define <2 x float> @f18( +; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]] ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 @@ -757,8 +774,9 @@ define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { ret <2 x float> %res } define <2 x float> @f19(<2 x float> %x) { -; CHECK-LABEL: @f19( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-LABEL: define <2 x float> @f19( +; CHECK-SAME: <2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]] ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]] @@ -770,9 +788,10 @@ define <2 x float> @f19(<2 x float> %x) { ret <2 x float> %res } define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @f20( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 +; CHECK-LABEL: define <2 x i1> @f20( +; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]] ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 @@ -786,8 +805,9 @@ define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { } declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) define <2 x float> @f21(<2 x float> %x) { -; CHECK-LABEL: @f21( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-LABEL: define <2 x float> @f21( +; CHECK-SAME: <2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]]) ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]]) @@ -800,10 +820,11 @@ define <2 x float> @f21(<2 x float> %x) { } declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { -; CHECK-LABEL: @f22( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 -; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0 +; CHECK-LABEL: define <2 x float> @f22( +; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0 +; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]]) ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 @@ -819,10 +840,11 @@ define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { ; See https://reviews.llvm.org/D83101#2133062 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { -; CHECK-LABEL: @f23_crash( -; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0 +; CHECK-LABEL: define <2 x i32> @f23_crash( +; CHECK-SAME: <2 x i32> [[SRCVEC:%.*]], i32 [[V1:%.*]]) { +; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC]], i64 0 ; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0 -; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[T1]] ; %v0 = extractelement <2 x i32> %srcvec, i32 0 @@ -838,3 +860,15 @@ define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { !4 = !{ float 4.0 } !5 = !{ i64 0, i64 8, null } !13 = distinct !{} +;. +; CHECK: [[SET1_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"set1", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"root"} +; CHECK: [[SET2_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"set2", [[META2]]} +; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 8, null} +; CHECK: [[ACC_GRP6]] = distinct !{} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]} +; CHECK: [[META8]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP6]]} +; CHECK: [[META9]] = !{float 4.000000e+00} +;. diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll index 190e8a089a5f6..82337c927a9ed 100644 --- a/llvm/test/Transforms/Scalarizer/basic.ll +++ b/llvm/test/Transforms/Scalarizer/basic.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt %s -passes='function(scalarizer,dce)' -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -6,21 +6,22 @@ declare <4 x float> @ext(<4 x float>) @g = global <4 x float> zeroinitializer define void @f1(<4 x float> %init, ptr %base, i32 %count) { -; CHECK-LABEL: @f1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0 +; CHECK-LABEL: define void @f1( +; CHECK-SAME: <4 x float> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT]], i64 0 ; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1 ; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2 ; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], %[[ENTRY]] ], [ [[SEL_I0:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], %[[ENTRY]] ], [ [[SEL_I1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], %[[ENTRY]] ], [ [[SEL_I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], %[[ENTRY]] ], [ [[SEL_I3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 -; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE]], i32 [[I]] ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4 @@ -54,8 +55,8 @@ define void @f1(<4 x float> %init, ptr %base, i32 %count) { ; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8 ; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 -; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: @@ -92,21 +93,22 @@ exit: } define void @f2(<4 x i32> %init, ptr %base, i32 %count) { -; CHECK-LABEL: @f2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0 +; CHECK-LABEL: define void @f2( +; CHECK-SAME: <4 x i32> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT]], i64 0 ; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1 ; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2 ; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], %[[ENTRY]] ], [ [[SEL_I0:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], %[[ENTRY]] ], [ [[SEL_I1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], %[[ENTRY]] ], [ [[SEL_I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], %[[ENTRY]] ], [ [[SEL_I3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 -; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE]], i32 [[I]] ; CHECK-NEXT: [[VAL_I0:%.*]] = load i8, ptr [[PTR]], align 4 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i8, ptr [[PTR_I1]], align 1 @@ -139,8 +141,8 @@ define void @f2(<4 x i32> %init, ptr %base, i32 %count) { ; CHECK-NEXT: store i8 [[TRUNC_I2]], ptr [[PTR_I2]], align 2 ; CHECK-NEXT: store i8 [[TRUNC_I3]], ptr [[PTR_I3]], align 1 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 -; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: @@ -172,25 +174,26 @@ exit: ; Check that !tbaa information is preserved. define void @f3(ptr %src, ptr %dst) { -; CHECK-LABEL: @f3( -; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1 +; CHECK-LABEL: define void @f3( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16, !tbaa [[SET1_TBAA0:![0-9]+]] ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1 -; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[SET1_TBAA0]] ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2 -; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[SET1_TBAA0]] ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3 -; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[SET1_TBAA0]] ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] -; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[TBAA3:![0-9]+]] -; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[SET2_TBAA3:![0-9]+]] +; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[SET2_TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[SET2_TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[SET2_TBAA3]] ; CHECK-NEXT: ret void ; %val = load <4 x i32> , ptr %src, !tbaa !1 @@ -201,11 +204,12 @@ define void @f3(ptr %src, ptr %dst) { ; Check that !tbaa.struct information is preserved. define void @f4(ptr %src, ptr %dst) { -; CHECK-LABEL: @f4( -; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1 +; CHECK-LABEL: define void @f4( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]] ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2 @@ -230,16 +234,17 @@ define void @f4(ptr %src, ptr %dst) { ; Check that llvm.access.group information is preserved. define void @f5(i32 %count, ptr %src, ptr %dst) { -; CHECK-LABEL: @f5( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC:%.*]], i32 [[INDEX]] +; CHECK-LABEL: define void @f5( +; CHECK-SAME: i32 [[COUNT:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT_INDEX:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[THIS_SRC_I1:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 1 ; CHECK-NEXT: [[THIS_SRC_I2:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 2 ; CHECK-NEXT: [[THIS_SRC_I3:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 3 -; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[THIS_DST_I1:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 1 ; CHECK-NEXT: [[THIS_DST_I2:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 2 ; CHECK-NEXT: [[THIS_DST_I3:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 3 @@ -256,9 +261,9 @@ define void @f5(i32 %count, ptr %src, ptr %dst) { ; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[THIS_DST_I2]], align 8, !llvm.access.group [[ACC_GRP6]] ; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[THIS_DST_I3]], align 4, !llvm.access.group [[ACC_GRP6]] ; CHECK-NEXT: [[NEXT_INDEX]] = add i32 [[INDEX]], -1 -; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]] -; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: end: +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT]] +; CHECK-NEXT: br i1 [[CONTINUE]], label %[[LOOP]], label %[[END:.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[END]]: ; CHECK-NEXT: ret void ; entry: @@ -281,8 +286,9 @@ end: ; Check that fpmath information is preserved. define <4 x float> @f6(<4 x float> %x) { -; CHECK-LABEL: @f6( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0 +; CHECK-LABEL: define <4 x float> @f6( +; CHECK-SAME: <4 x float> [[X:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath [[META9:![0-9]+]] ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath [[META9]] @@ -303,11 +309,12 @@ define <4 x float> @f6(<4 x float> %x) { ; Check that random metadata isn't kept. define void @f7(ptr %src, ptr %dst) { -; CHECK-LABEL: @f7( -; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1 +; CHECK-LABEL: define void @f7( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC]], align 16 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2 @@ -332,17 +339,18 @@ define void @f7(ptr %src, ptr %dst) { ; Test GEP with vectors. define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0, -; CHECK-LABEL: @f8( -; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 +; CHECK-LABEL: define void @f8( +; CHECK-SAME: ptr [[DEST:%.*]], <4 x ptr> [[PTR0:%.*]], <4 x i32> [[I0:%.*]], ptr [[OTHER:%.*]]) { +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0 +; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 0 ; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2 ; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3 -; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1 +; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0]], i64 1 ; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100 -; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]] +; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER]], i32 [[I0_I1]] ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100 ; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr float, ptr [[PTR0_I3]], i32 [[I0_I3]] ; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32 @@ -362,11 +370,12 @@ define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0, ; Test the handling of unaligned loads. define void @f9(ptr %dest, ptr %src) { -; CHECK-LABEL: @f9( -; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1 +; CHECK-LABEL: define void @f9( +; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 4 +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC]], align 4 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 4 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2 @@ -386,11 +395,12 @@ define void @f9(ptr %dest, ptr %src) { ; ...and again with subelement alignment. define void @f10(ptr %dest, ptr %src) { -; CHECK-LABEL: @f10( -; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1 +; CHECK-LABEL: define void @f10( +; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 1 +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC]], align 1 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 1 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2 @@ -410,8 +420,9 @@ define void @f10(ptr %dest, ptr %src) { ; Test that sub-byte loads aren't scalarized. define void @f11(ptr %dest, ptr %src0) { -; CHECK-LABEL: @f11( -; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1 +; CHECK-LABEL: define void @f11( +; CHECK-SAME: ptr [[DEST:%.*]], ptr [[SRC0:%.*]]) { +; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0]], i32 1 ; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4 ; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0 ; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1 @@ -542,7 +553,7 @@ define void @f11(ptr %dest, ptr %src0) { ; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29 ; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30 ; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31 -; CHECK-NEXT: store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: store <32 x i1> [[AND]], ptr [[DEST]], align 4 ; CHECK-NEXT: ret void ; %src1 = getelementptr <32 x i1>, ptr %src0, i32 1 @@ -555,12 +566,13 @@ define void @f11(ptr %dest, ptr %src0) { ; Test vector GEPs with more than one index. define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i, -; CHECK-LABEL: @f13( -; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 +; CHECK-LABEL: define void @f13( +; CHECK-SAME: ptr [[DEST:%.*]], <4 x ptr> [[PTR:%.*]], <4 x i32> [[I:%.*]], ptr [[OTHER:%.*]]) { +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0 +; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR]], i64 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I]], i64 0 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]] ; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1 ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1 @@ -587,19 +599,20 @@ define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i, ; Test combinations of vector and non-vector PHIs. define <4 x float> @f14(<4 x float> %acc, i32 %count) { -; CHECK-LABEL: @f14( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0 +; CHECK-LABEL: define <4 x float> @f14( +; CHECK-SAME: <4 x float> [[ACC:%.*]], i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC]], i64 0 ; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1 ; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2 ; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], %[[ENTRY]] ], [ [[NEXT_ACC_I0:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], %[[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], %[[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], %[[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXT_COUNT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0 ; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1 ; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2 @@ -619,8 +632,8 @@ define <4 x float> @f14(<4 x float> %acc, i32 %count) { ; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3 ; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret <4 x float> [[NEXT_ACC]] ; entry: @@ -641,13 +654,14 @@ exit: ; Test unary operator scalarization. define void @f15(<4 x float> %init, ptr %base, i32 %count) { -; CHECK-LABEL: @f15( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-LABEL: define void @f15( +; CHECK-SAME: <4 x float> [[INIT:%.*]], ptr [[BASE:%.*]], i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT]], %[[ENTRY]] ], [ [[NEXTI:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 -; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE]], i32 [[I]] ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4 @@ -681,8 +695,8 @@ define void @f15(<4 x float> %init, ptr %base, i32 %count) { ; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8 ; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 -; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[TEST]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: @@ -712,9 +726,10 @@ exit: ; Check that IR flags are preserved. define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { -; CHECK-LABEL: @f16( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 +; CHECK-LABEL: define <2 x i32> @f16( +; CHECK-SAME: <2 x i32> [[I:%.*]], <2 x i32> [[J:%.*]]) { +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 ; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 @@ -727,9 +742,10 @@ define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { ret <2 x i32> %res } define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { -; CHECK-LABEL: @f17( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 +; CHECK-LABEL: define <2 x i32> @f17( +; CHECK-SAME: <2 x i32> [[I:%.*]], <2 x i32> [[J:%.*]]) { +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]] ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 ; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 @@ -742,9 +758,10 @@ define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { ret <2 x i32> %res } define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @f18( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 +; CHECK-LABEL: define <2 x float> @f18( +; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]] ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 @@ -757,8 +774,9 @@ define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { ret <2 x float> %res } define <2 x float> @f19(<2 x float> %x) { -; CHECK-LABEL: @f19( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-LABEL: define <2 x float> @f19( +; CHECK-SAME: <2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]] ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]] @@ -770,9 +788,10 @@ define <2 x float> @f19(<2 x float> %x) { ret <2 x float> %res } define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @f20( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 +; CHECK-LABEL: define <2 x i1> @f20( +; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]] ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 @@ -786,8 +805,9 @@ define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { } declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) define <2 x float> @f21(<2 x float> %x) { -; CHECK-LABEL: @f21( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-LABEL: define <2 x float> @f21( +; CHECK-SAME: <2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]]) ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]]) @@ -800,10 +820,11 @@ define <2 x float> @f21(<2 x float> %x) { } declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { -; CHECK-LABEL: @f22( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 -; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0 +; CHECK-LABEL: define <2 x float> @f22( +; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y]], i64 0 +; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]]) ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 @@ -819,10 +840,11 @@ define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { ; See https://reviews.llvm.org/D83101#2133062 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { -; CHECK-LABEL: @f23_crash( -; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0 +; CHECK-LABEL: define <2 x i32> @f23_crash( +; CHECK-SAME: <2 x i32> [[SRCVEC:%.*]], i32 [[V1:%.*]]) { +; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC]], i64 0 ; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0 -; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[T1]] ; %v0 = extractelement <2 x i32> %srcvec, i32 0 @@ -832,8 +854,9 @@ define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { } define <2 x i32> @f24(<2 x i32> %src) { -; CHECK-LABEL: @f24( -; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0 +; CHECK-LABEL: define <2 x i32> @f24( +; CHECK-SAME: <2 x i32> [[SRC:%.*]]) { +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC]], i64 0 ; CHECK-NEXT: [[FRZ_I0:%.*]] = freeze i32 [[SRC_I0]] ; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1 ; CHECK-NEXT: [[FRZ_I1:%.*]] = freeze i32 [[SRC_I1]] @@ -846,8 +869,9 @@ define <2 x i32> @f24(<2 x i32> %src) { } define <2 x float> @f25(<2 x float> %src) { -; CHECK-LABEL: @f25( -; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x float> [[SRC:%.*]], i64 0 +; CHECK-LABEL: define <2 x float> @f25( +; CHECK-SAME: <2 x float> [[SRC:%.*]]) { +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x float> [[SRC]], i64 0 ; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[SRC_I0]], [[SRC_I0]] ; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x float> [[SRC]], i64 1 ; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[SRC_I1]], [[SRC_I1]] @@ -866,8 +890,9 @@ define <2 x float> @f25(<2 x float> %src) { } define <2 x i8> @test_copy_trunc_flags(<2 x i32> %src) { -; CHECK-LABEL: @test_copy_trunc_flags( -; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0 +; CHECK-LABEL: define <2 x i8> @test_copy_trunc_flags( +; CHECK-SAME: <2 x i32> [[SRC:%.*]]) { +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC]], i64 0 ; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc nuw nsw i32 [[SRC_I0]] to i8 ; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1 ; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc nuw nsw i32 [[SRC_I1]] to i8 @@ -886,3 +911,15 @@ define <2 x i8> @test_copy_trunc_flags(<2 x i32> %src) { !4 = !{ float 4.0 } !5 = !{ i64 0, i64 8, null } !13 = distinct !{} +;. +; CHECK: [[SET1_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"set1", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"root"} +; CHECK: [[SET2_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"set2", [[META2]]} +; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 8, null} +; CHECK: [[ACC_GRP6]] = distinct !{} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]} +; CHECK: [[META8]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP6]]} +; CHECK: [[META9]] = !{float 4.000000e+00} +;. From 5fd3aad54c1be20c96fe407348604b4657ce53ab Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Fri, 12 Sep 2025 11:31:27 -0700 Subject: [PATCH 167/734] [DirectX] Updating Root Signature YAML representation to use Enums instead of uint (#154827) This PR is updating Root Signature YAML to use enums, this is a required change to remove the use of to_underlying from DirectXContainer binary file. Closes: [#150676](https://github.com/llvm/llvm-project/issues/150676) --- .../include/llvm/ObjectYAML/DXContainerYAML.h | 31 +++-- llvm/include/llvm/Support/DXILABI.h | 1 - llvm/lib/ObjectYAML/DXContainerEmitter.cpp | 37 ++---- llvm/lib/ObjectYAML/DXContainerYAML.cpp | 113 +++++++++++++++--- ...escriptorTable-AllValidFlagCombinations.ll | 32 ++--- ...criptorTable-AllValidFlagCombinationsV1.ll | 8 +- .../RootSignature-DescriptorTable.ll | 12 +- .../RootSignature-RootConstants.ll | 8 +- .../RootSignature-RootDescriptor.ll | 8 +- .../RootSignature-RootDescriptor_V1.ll | 8 +- .../RootSignature-StaticSamplers.ll | 14 +-- .../RootSignature-Descriptor1.0.yaml | 8 +- .../RootSignature-Descriptor1.1.yaml | 8 +- .../RootSignature-DescriptorTable1.0.yaml | 12 +- .../RootSignature-DescriptorTable1.1.yaml | 12 +- ...ignature-Invalid-StaticSamplersOffset.yaml | 4 +- .../RootSignature-MultipleParameters.yaml | 36 +++--- .../RootSignature-OptionalOffsets.yaml | 14 +-- .../RootSignature-StaticSamplerOffset1.0.yaml | 14 +-- .../RootSignature-StaticSamplerOffset1.1.yaml | 14 +-- ...RootSignature-StaticSamplers-Defaults.yaml | 16 +-- .../RootSignature-StaticSamplers.yaml | 28 ++--- .../ObjectYAML/DXContainerYAMLTest.cpp | 42 +++---- 23 files changed, 274 insertions(+), 206 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h index 359b27761cea3..62bfee7693db1 100644 --- a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h +++ b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h @@ -92,7 +92,7 @@ struct RootDescriptorYaml { }; struct DescriptorRangeYaml { - uint32_t RangeType; + dxil::ResourceClass RangeType; uint32_t NumDescriptors; uint32_t BaseShaderRegister; uint32_t RegisterSpace; @@ -111,12 +111,12 @@ struct DescriptorTableYaml { }; struct RootParameterHeaderYaml { - uint32_t Type; - uint32_t Visibility; + dxbc::RootParameterType Type; + dxbc::ShaderVisibility Visibility; uint32_t Offset; RootParameterHeaderYaml(){}; - RootParameterHeaderYaml(uint32_t T) : Type(T) {} + RootParameterHeaderYaml(dxbc::RootParameterType T) : Type(T) {} }; struct RootParameterLocationYaml { @@ -165,21 +165,19 @@ struct RootParameterYamlDesc { }; struct StaticSamplerYamlDesc { - uint32_t Filter = llvm::to_underlying(dxbc::SamplerFilter::Anisotropic); - uint32_t AddressU = llvm::to_underlying(dxbc::TextureAddressMode::Wrap); - uint32_t AddressV = llvm::to_underlying(dxbc::TextureAddressMode::Wrap); - uint32_t AddressW = llvm::to_underlying(dxbc::TextureAddressMode::Wrap); + dxbc::SamplerFilter Filter = dxbc::SamplerFilter::Anisotropic; + dxbc::TextureAddressMode AddressU = dxbc::TextureAddressMode::Wrap; + dxbc::TextureAddressMode AddressV = dxbc::TextureAddressMode::Wrap; + dxbc::TextureAddressMode AddressW = dxbc::TextureAddressMode::Wrap; float MipLODBias = 0.f; uint32_t MaxAnisotropy = 16u; - uint32_t ComparisonFunc = - llvm::to_underlying(dxbc::ComparisonFunc::LessEqual); - uint32_t BorderColor = - llvm::to_underlying(dxbc::StaticBorderColor::OpaqueWhite); + dxbc::ComparisonFunc ComparisonFunc = dxbc::ComparisonFunc::LessEqual; + dxbc::StaticBorderColor BorderColor = dxbc::StaticBorderColor::OpaqueWhite; float MinLOD = 0.f; float MaxLOD = std::numeric_limits::max(); uint32_t ShaderRegister; uint32_t RegisterSpace; - uint32_t ShaderVisibility; + dxbc::ShaderVisibility ShaderVisibility; }; struct RootSignatureYamlDesc { @@ -321,6 +319,13 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::PSV::ResourceKind) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::D3DSystemValue) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::SigComponentType) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::SigMinPrecision) +LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::RootParameterType) +LLVM_YAML_DECLARE_ENUM_TRAITS(dxil::ResourceClass) +LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::SamplerFilter) +LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::StaticBorderColor) +LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::TextureAddressMode) +LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::ShaderVisibility) +LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::ComparisonFunc) namespace llvm { diff --git a/llvm/include/llvm/Support/DXILABI.h b/llvm/include/llvm/Support/DXILABI.h index 307a1d1d43f5c..e6600c3406df5 100644 --- a/llvm/include/llvm/Support/DXILABI.h +++ b/llvm/include/llvm/Support/DXILABI.h @@ -102,7 +102,6 @@ const unsigned MinWaveSize = 4; const unsigned MaxWaveSize = 128; LLVM_ABI StringRef getResourceClassName(ResourceClass RC); - } // namespace dxil } // namespace llvm diff --git a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp index 73dfa9899d613..910383816f43b 100644 --- a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp +++ b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp @@ -274,13 +274,8 @@ Error DXContainerWriter::writeParts(raw_ostream &OS) { for (DXContainerYAML::RootParameterLocationYaml &L : P.RootSignature->Parameters.Locations) { - assert(dxbc::isValidParameterType(L.Header.Type) && - "invalid DXContainer YAML"); - assert(dxbc::isValidShaderVisibility(L.Header.Visibility) && - "invalid DXContainer YAML"); - dxbc::RootParameterType Type = dxbc::RootParameterType(L.Header.Type); - dxbc::ShaderVisibility Visibility = - dxbc::ShaderVisibility(L.Header.Visibility); + const dxbc::RootParameterType Type = L.Header.Type; + const dxbc::ShaderVisibility Visibility = L.Header.Visibility; switch (Type) { case dxbc::RootParameterType::Constants32Bit: { @@ -313,10 +308,8 @@ Error DXContainerWriter::writeParts(raw_ostream &OS) { P.RootSignature->Parameters.getOrInsertTable(L); mcdxbc::DescriptorTable Table; for (const auto &R : TableYaml.Ranges) { - assert(dxbc::isValidRangeType(R.RangeType) && - "Invalid Descriptor Range Type"); mcdxbc::DescriptorRange Range; - Range.RangeType = dxil::ResourceClass(R.RangeType); + Range.RangeType = R.RangeType; Range.NumDescriptors = R.NumDescriptors; Range.BaseShaderRegister = R.BaseShaderRegister; Range.RegisterSpace = R.RegisterSpace; @@ -335,30 +328,20 @@ Error DXContainerWriter::writeParts(raw_ostream &OS) { } for (const auto &Param : P.RootSignature->samplers()) { - assert(dxbc::isValidSamplerFilter(Param.Filter) && - dxbc::isValidAddress(Param.AddressU) && - dxbc::isValidAddress(Param.AddressV) && - dxbc::isValidAddress(Param.AddressW) && - dxbc::isValidComparisonFunc(Param.ComparisonFunc) && - dxbc::isValidBorderColor(Param.BorderColor) && - dxbc::isValidShaderVisibility(Param.ShaderVisibility) && - "Invalid enum value in static sampler"); - mcdxbc::StaticSampler NewSampler; - NewSampler.Filter = dxbc::SamplerFilter(Param.Filter); - NewSampler.AddressU = dxbc::TextureAddressMode(Param.AddressU); - NewSampler.AddressV = dxbc::TextureAddressMode(Param.AddressV); - NewSampler.AddressW = dxbc::TextureAddressMode(Param.AddressW); + NewSampler.Filter = Param.Filter; + NewSampler.AddressU = Param.AddressU; + NewSampler.AddressV = Param.AddressV; + NewSampler.AddressW = Param.AddressW; NewSampler.MipLODBias = Param.MipLODBias; NewSampler.MaxAnisotropy = Param.MaxAnisotropy; - NewSampler.ComparisonFunc = dxbc::ComparisonFunc(Param.ComparisonFunc); - NewSampler.BorderColor = dxbc::StaticBorderColor(Param.BorderColor); + NewSampler.ComparisonFunc = Param.ComparisonFunc; + NewSampler.BorderColor = Param.BorderColor; NewSampler.MinLOD = Param.MinLOD; NewSampler.MaxLOD = Param.MaxLOD; NewSampler.ShaderRegister = Param.ShaderRegister; NewSampler.RegisterSpace = Param.RegisterSpace; - NewSampler.ShaderVisibility = - dxbc::ShaderVisibility(Param.ShaderVisibility); + NewSampler.ShaderVisibility = Param.ShaderVisibility; RS.StaticSamplers.push_back(NewSampler); } diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp index 32b502ed4e21f..22674b1ceb734 100644 --- a/llvm/lib/ObjectYAML/DXContainerYAML.cpp +++ b/llvm/lib/ObjectYAML/DXContainerYAML.cpp @@ -60,7 +60,10 @@ readDescriptorRanges(DXContainerYAML::RootParameterHeaderYaml &Header, NewR.NumDescriptors = R.NumDescriptors; NewR.BaseShaderRegister = R.BaseShaderRegister; NewR.RegisterSpace = R.RegisterSpace; - NewR.RangeType = R.RangeType; + if (!dxbc::isValidRangeType(R.RangeType)) + return createStringError(std::errc::invalid_argument, + "Invalid value for descriptor range type"); + NewR.RangeType = dxil::ResourceClass(R.RangeType); if constexpr (std::is_same_v) { // Set all flag fields for v2 #define DESCRIPTOR_RANGE_FLAG(Num, Enum, Flag) \ @@ -94,15 +97,14 @@ DXContainerYAML::RootSignatureYamlDesc::create( return createStringError(std::errc::invalid_argument, "Invalid value for parameter type"); - RootParameterHeaderYaml Header(PH.ParameterType); + RootParameterHeaderYaml Header(dxbc::RootParameterType(PH.ParameterType)); Header.Offset = PH.ParameterOffset; - Header.Type = PH.ParameterType; if (!dxbc::isValidShaderVisibility(PH.ShaderVisibility)) return createStringError(std::errc::invalid_argument, "Invalid value for shader visibility"); - Header.Visibility = PH.ShaderVisibility; + Header.Visibility = dxbc::ShaderVisibility(PH.ShaderVisibility); llvm::Expected ParamViewOrErr = Data.getParameter(PH); @@ -162,20 +164,50 @@ DXContainerYAML::RootSignatureYamlDesc::create( } for (const auto &S : Data.samplers()) { + if (!dxbc::isValidSamplerFilter(S.Filter)) + return createStringError(std::errc::invalid_argument, + "Invalid value for static sampler filter"); + + if (!dxbc::isValidAddress(S.AddressU)) + return createStringError(std::errc::invalid_argument, + "Invalid value for static sampler AddressU"); + + if (!dxbc::isValidAddress(S.AddressV)) + return createStringError(std::errc::invalid_argument, + "Invalid value for static sampler AddressV"); + + if (!dxbc::isValidAddress(S.AddressW)) + return createStringError(std::errc::invalid_argument, + "Invalid value for static sampler AddressW"); + + if (!dxbc::isValidComparisonFunc(S.ComparisonFunc)) + return createStringError( + std::errc::invalid_argument, + "Invalid value for static sampler ComparisonFunc"); + + if (!dxbc::isValidBorderColor(S.BorderColor)) + return createStringError(std::errc::invalid_argument, + "Invalid value for static sampler BorderColor"); + + if (!dxbc::isValidShaderVisibility(S.ShaderVisibility)) + return createStringError( + std::errc::invalid_argument, + "Invalid value for static sampler ShaderVisibility"); + StaticSamplerYamlDesc NewS; - NewS.Filter = S.Filter; - NewS.AddressU = S.AddressU; - NewS.AddressV = S.AddressV; - NewS.AddressW = S.AddressW; + NewS.Filter = dxbc::SamplerFilter(S.Filter); + NewS.AddressU = dxbc::TextureAddressMode(S.AddressU); + NewS.AddressV = dxbc::TextureAddressMode(S.AddressV); + NewS.AddressW = dxbc::TextureAddressMode(S.AddressW); NewS.MipLODBias = S.MipLODBias; NewS.MaxAnisotropy = S.MaxAnisotropy; - NewS.ComparisonFunc = S.ComparisonFunc; - NewS.BorderColor = S.BorderColor; + NewS.ComparisonFunc = dxbc::ComparisonFunc(S.ComparisonFunc); + NewS.BorderColor = dxbc::StaticBorderColor(S.BorderColor); NewS.MinLOD = S.MinLOD; NewS.MaxLOD = S.MaxLOD; NewS.ShaderRegister = S.ShaderRegister; NewS.RegisterSpace = S.RegisterSpace; - NewS.ShaderVisibility = S.ShaderVisibility; + NewS.ShaderVisibility = dxbc::ShaderVisibility(S.ShaderVisibility); RootSigDesc.StaticSamplers.push_back(NewS); } @@ -425,21 +457,21 @@ void MappingContextTraits::enumeration( IO.enumCase(Value, E.Name.str().c_str(), E.Value); } +void ScalarEnumerationTraits::enumeration( + IO &IO, dxbc::RootParameterType &Value) { + for (const auto &E : dxbc::getRootParameterTypes()) + IO.enumCase(Value, E.Name.str().c_str(), E.Value); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, dxil::ResourceClass &Value) { + const EnumEntry ResourceClasses[] = { + {"CBuffer", dxil::ResourceClass::CBuffer}, + {"SRV", dxil::ResourceClass::SRV}, + {"UAV", dxil::ResourceClass::UAV}, + {"Sampler", dxil::ResourceClass::Sampler}, + }; + + for (const auto &E : ResourceClasses) + IO.enumCase(Value, E.Name.str().c_str(), E.Value); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, dxbc::SamplerFilter &Value) { + for (const auto &E : dxbc::getSamplerFilters()) + IO.enumCase(Value, E.Name.str().c_str(), E.Value); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, dxbc::StaticBorderColor &Value) { + for (const auto &E : dxbc::getStaticBorderColors()) + IO.enumCase(Value, E.Name.str().c_str(), E.Value); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, dxbc::TextureAddressMode &Value) { + for (const auto &E : dxbc::getTextureAddressModes()) + IO.enumCase(Value, E.Name.str().c_str(), E.Value); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, dxbc::ShaderVisibility &Value) { + for (const auto &E : dxbc::getShaderVisibility()) + IO.enumCase(Value, E.Name.str().c_str(), E.Value); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, dxbc::ComparisonFunc &Value) { + for (const auto &E : dxbc::getComparisonFuncs()) + IO.enumCase(Value, E.Name.str().c_str(), E.Value); +} + } // namespace yaml void DXContainerYAML::PSVInfo::mapInfoForVersion(yaml::IO &IO) { diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinations.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinations.ll index 1bc9b85935819..d6cb05b5d0dd9 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinations.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinations.ll @@ -61,94 +61,94 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } ;DXC-NEXT: NumStaticSamplers: 0 ;DXC-NEXT: StaticSamplersOffset: 380 ;DXC-NEXT: Parameters: -;DXC-NEXT: - ParameterType: 0 -;DXC-NEXT: ShaderVisibility: 0 +;DXC-NEXT: - ParameterType: DescriptorTable +;DXC-NEXT: ShaderVisibility: All ;DXC-NEXT: Table: ;DXC-NEXT: NumRanges: 14 ;DXC-NEXT: RangesOffset: 44 ;DXC-NEXT: Ranges: -;DXC-NEXT: - RangeType: 3 +;DXC-NEXT: - RangeType: Sampler ;DXC-NEXT: NumDescriptors: 1 ;DXC-NEXT: BaseShaderRegister: 0 ;DXC-NEXT: RegisterSpace: 1 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -;DXC-NEXT: - RangeType: 3 +;DXC-NEXT: - RangeType: Sampler ;DXC-NEXT: NumDescriptors: 1 ;DXC-NEXT: BaseShaderRegister: 0 ;DXC-NEXT: RegisterSpace: 3 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 ;DXC-NEXT: DESCRIPTORS_VOLATILE: true -;DXC-NEXT: - RangeType: 3 +;DXC-NEXT: - RangeType: Sampler ;DXC-NEXT: NumDescriptors: 1 ;DXC-NEXT: BaseShaderRegister: 0 ;DXC-NEXT: RegisterSpace: 4 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 ;DXC-NEXT: DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS: true -;DXC-NEXT: - RangeType: 0 +;DXC-NEXT: - RangeType: SRV ;DXC-NEXT: NumDescriptors: 1 ;DXC-NEXT: BaseShaderRegister: 0 ;DXC-NEXT: RegisterSpace: 5 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 ;DXC-NEXT: DESCRIPTORS_VOLATILE: true -;DXC-NEXT: - RangeType: 1 +;DXC-NEXT: - RangeType: UAV ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 6 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 5 ;DXC-NEXT: DESCRIPTORS_VOLATILE: true -;DXC-NEXT: - RangeType: 2 +;DXC-NEXT: - RangeType: CBuffer ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 7 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 5 ;DXC-NEXT: DATA_VOLATILE: true -;DXC-NEXT: - RangeType: 0 +;DXC-NEXT: - RangeType: SRV ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 8 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 5 ;DXC-NEXT: DATA_STATIC: true -;DXC-NEXT: - RangeType: 1 +;DXC-NEXT: - RangeType: UAV ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 9 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 5 ;DXC-NEXT: DATA_STATIC_WHILE_SET_AT_EXECUTE: true -;DXC-NEXT: - RangeType: 2 +;DXC-NEXT: - RangeType: CBuffer ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 10 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 5 ;DXC-NEXT: DESCRIPTORS_VOLATILE: true ;DXC-NEXT: DATA_VOLATILE: true -;DXC-NEXT: - RangeType: 0 +;DXC-NEXT: - RangeType: SRV ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 11 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 5 ;DXC-NEXT: DESCRIPTORS_VOLATILE: true ;DXC-NEXT: DATA_STATIC_WHILE_SET_AT_EXECUTE: true -;DXC-NEXT: - RangeType: 1 +;DXC-NEXT: - RangeType: UAV ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 12 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 5 ;DXC-NEXT: DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS: true -;DXC-NEXT: - RangeType: 2 +;DXC-NEXT: - RangeType: CBuffer ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 13 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 5 ;DXC-NEXT: DATA_VOLATILE: true ;DXC-NEXT: DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS: true -;DXC-NEXT: - RangeType: 0 +;DXC-NEXT: - RangeType: SRV ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 14 ;DXC-NEXT: OffsetInDescriptorsFromTableStart: 5 ;DXC-NEXT: DATA_STATIC: true ;DXC-NEXT: DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS: true -;DXC-NEXT: - RangeType: 1 +;DXC-NEXT: - RangeType: UAV ;DXC-NEXT: NumDescriptors: 5 ;DXC-NEXT: BaseShaderRegister: 1 ;DXC-NEXT: RegisterSpace: 15 diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinationsV1.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinationsV1.ll index fec6c4c959642..c65eab5f4aa5f 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinationsV1.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-AllValidFlagCombinationsV1.ll @@ -26,18 +26,18 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } ; DXC-NEXT: NumStaticSamplers: 0 ; DXC-NEXT: StaticSamplersOffset: 84 ; DXC-NEXT: Parameters: -; DXC-NEXT: - ParameterType: 0 -; DXC-NEXT: ShaderVisibility: 0 +; DXC-NEXT: - ParameterType: DescriptorTable +; DXC-NEXT: ShaderVisibility: All ; DXC-NEXT: Table: ; DXC-NEXT: NumRanges: 2 ; DXC-NEXT: RangesOffset: 44 ; DXC-NEXT: Ranges: -; DXC-NEXT: - RangeType: 3 +; DXC-NEXT: - RangeType: Sampler ; DXC-NEXT: NumDescriptors: 1 ; DXC-NEXT: BaseShaderRegister: 1 ; DXC-NEXT: RegisterSpace: 0 ; DXC-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -; DXC-NEXT: - RangeType: 1 +; DXC-NEXT: - RangeType: UAV ; DXC-NEXT: NumDescriptors: 5 ; DXC-NEXT: BaseShaderRegister: 1 ; DXC-NEXT: RegisterSpace: 10 diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable.ll index 4f6f0d0bd6a14..c3985503e3788 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable.ll @@ -23,24 +23,24 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } ; DXC-NEXT: Size: 92 ; DXC-NEXT: RootSignature: ; DXC-NEXT: Version: 2 -; DXC-NEXT: NumRootParameters: 1 -; DXC-NEXT: RootParametersOffset: 24 +; DXC-NEXT: NumRootParameters: 1 +; DXC-NEXT: RootParametersOffset: 24 ; DXC-NEXT: NumStaticSamplers: 0 ; DXC-NEXT: StaticSamplersOffset: 92 ; DXC-NEXT: Parameters: -; DXC-NEXT: - ParameterType: 0 -; DXC-NEXT: ShaderVisibility: 0 +; DXC-NEXT: - ParameterType: DescriptorTable +; DXC-NEXT: ShaderVisibility: All ; DXC-NEXT: Table: ; DXC-NEXT: NumRanges: 2 ; DXC-NEXT: RangesOffset: 44 ; DXC-NEXT: Ranges: -; DXC-NEXT: - RangeType: 0 +; DXC-NEXT: - RangeType: SRV ; DXC-NEXT: NumDescriptors: 1 ; DXC-NEXT: BaseShaderRegister: 1 ; DXC-NEXT: RegisterSpace: 0 ; DXC-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 ; DXC-NEXT: DATA_STATIC_WHILE_SET_AT_EXECUTE: true -; DXC-NEXT: - RangeType: 1 +; DXC-NEXT: - RangeType: UAV ; DXC-NEXT: NumDescriptors: 5 ; DXC-NEXT: BaseShaderRegister: 1 ; DXC-NEXT: RegisterSpace: 10 diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants.ll index d217f396722bc..4dec4e51abcd8 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants.ll @@ -21,13 +21,13 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } ; DXC-NEXT: Size: 48 ; DXC-NEXT: RootSignature: ; DXC-NEXT: Version: 2 -; DXC-NEXT: NumRootParameters: 1 -; DXC-NEXT: RootParametersOffset: 24 +; DXC-NEXT: NumRootParameters: 1 +; DXC-NEXT: RootParametersOffset: 24 ; DXC-NEXT: NumStaticSamplers: 0 ; DXC-NEXT: StaticSamplersOffset: 48 ; DXC-NEXT: Parameters: -; DXC-NEXT: - ParameterType: 1 -; DXC-NEXT: ShaderVisibility: 0 +; DXC-NEXT: - ParameterType: Constants32Bit +; DXC-NEXT: ShaderVisibility: All ; DXC-NEXT: Constants: ; DXC-NEXT: Num32BitValues: 3 ; DXC-NEXT: RegisterSpace: 2 diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor.ll index 54292bb651532..6f3acdae2b81f 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor.ll @@ -21,13 +21,13 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } ; DXC-NEXT: Size: 48 ; DXC-NEXT: RootSignature: ; DXC-NEXT: Version: 2 -; DXC-NEXT: NumRootParameters: 1 -; DXC-NEXT: RootParametersOffset: 24 +; DXC-NEXT: NumRootParameters: 1 +; DXC-NEXT: RootParametersOffset: 24 ; DXC-NEXT: NumStaticSamplers: 0 ; DXC-NEXT: StaticSamplersOffset: 48 ; DXC-NEXT: Parameters: -; DXC-NEXT: - ParameterType: 2 -; DXC-NEXT: ShaderVisibility: 0 +; DXC-NEXT: - ParameterType: CBV +; DXC-NEXT: ShaderVisibility: All ; DXC-NEXT: Descriptor: ; DXC-NEXT: RegisterSpace: 2 ; DXC-NEXT: ShaderRegister: 1 diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor_V1.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor_V1.ll index 891a03b688a82..3509360e313e3 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor_V1.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor_V1.ll @@ -21,13 +21,13 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } ; DXC-NEXT: Size: 44 ; DXC-NEXT: RootSignature: ; DXC-NEXT: Version: 1 -; DXC-NEXT: NumRootParameters: 1 -; DXC-NEXT: RootParametersOffset: 24 +; DXC-NEXT: NumRootParameters: 1 +; DXC-NEXT: RootParametersOffset: 24 ; DXC-NEXT: NumStaticSamplers: 0 ; DXC-NEXT: StaticSamplersOffset: 44 ; DXC-NEXT: Parameters: -; DXC-NEXT: - ParameterType: 2 -; DXC-NEXT: ShaderVisibility: 0 +; DXC-NEXT: - ParameterType: CBV +; DXC-NEXT: ShaderVisibility: All ; DXC-NEXT: Descriptor: ; DXC-NEXT: RegisterSpace: 2 ; DXC-NEXT: ShaderRegister: 1 diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers.ll index d9ee39dbb7287..1dd470d7fb822 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers.ll @@ -27,16 +27,16 @@ attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } ; DXC-NEXT: StaticSamplersOffset: 24 ; DXC-NEXT: Parameters: [] ; DXC-NEXT: Samplers: -; DXC-NEXT: - Filter: 4 -; DXC-NEXT: AddressU: 2 -; DXC-NEXT: AddressV: 3 -; DXC-NEXT: AddressW: 5 +; DXC-NEXT: - Filter: MinPointMagLinearMipPoint +; DXC-NEXT: AddressU: Mirror +; DXC-NEXT: AddressV: Clamp +; DXC-NEXT: AddressW: MirrorOnce ; DXC-NEXT: MipLODBias: 1.425 ; DXC-NEXT: MaxAnisotropy: 9 -; DXC-NEXT: ComparisonFunc: 3 -; DXC-NEXT: BorderColor: 2 +; DXC-NEXT: ComparisonFunc: Equal +; DXC-NEXT: BorderColor: OpaqueWhite ; DXC-NEXT: MinLOD: -128 ; DXC-NEXT: MaxLOD: 128 ; DXC-NEXT: ShaderRegister: 42 ; DXC-NEXT: RegisterSpace: 0 -; DXC-NEXT: ShaderVisibility: 0 +; DXC-NEXT: ShaderVisibility: All diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.0.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.0.yaml index 70dc35287ba91..530ed79a95ebb 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.0.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.0.yaml @@ -19,8 +19,8 @@ Parts: NumStaticSamplers: 0 StaticSamplersOffset: 44 Parameters: - - ParameterType: 2 # SRV - ShaderVisibility: 3 # Domain + - ParameterType: CBV + ShaderVisibility: Domain Descriptor: ShaderRegister: 31 RegisterSpace: 32 @@ -36,8 +36,8 @@ Parts: # CHECK-NEXT: NumStaticSamplers: 0 # CHECK-NEXT: StaticSamplersOffset: 44 # CHECK-NEXT: Parameters: -# CHECK-NEXT: - ParameterType: 2 -# CHECK-NEXT: ShaderVisibility: 3 +# CHECK-NEXT: - ParameterType: CBV +# CHECK-NEXT: ShaderVisibility: Domain # CHECK-NEXT: Descriptor: # CHECK-NEXT: RegisterSpace: 32 # CHECK-NEXT: ShaderRegister: 31 diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.1.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.1.yaml index 33a74dbf6a3f4..2e8df2eaed7a8 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.1.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-Descriptor1.1.yaml @@ -19,8 +19,8 @@ Parts: NumStaticSamplers: 0 StaticSamplersOffset: 48 Parameters: - - ParameterType: 2 # SRV - ShaderVisibility: 3 # Domain + - ParameterType: CBV + ShaderVisibility: Domain Descriptor: ShaderRegister: 31 RegisterSpace: 32 @@ -37,8 +37,8 @@ Parts: # CHECK-NEXT: NumStaticSamplers: 0 # CHECK-NEXT: StaticSamplersOffset: 48 # CHECK-NEXT: Parameters: -# CHECK-NEXT: - ParameterType: 2 -# CHECK-NEXT: ShaderVisibility: 3 +# CHECK-NEXT: - ParameterType: CBV +# CHECK-NEXT: ShaderVisibility: Domain # CHECK-NEXT: Descriptor: # CHECK-NEXT: RegisterSpace: 32 # CHECK-NEXT: ShaderRegister: 31 diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.0.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.0.yaml index b04549fde88f7..88d941f75682b 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.0.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.0.yaml @@ -20,12 +20,12 @@ Parts: NumStaticSamplers: 0 StaticSamplersOffset: 64 Parameters: - - ParameterType: 0 # SRV - ShaderVisibility: 3 # Domain + - ParameterType: DescriptorTable + ShaderVisibility: Domain Table: NumRanges: 1 Ranges: - - RangeType: 0 + - RangeType: SRV NumDescriptors: -1 BaseShaderRegister: 42 RegisterSpace: 43 @@ -42,13 +42,13 @@ Parts: # CHECK-NEXT: NumStaticSamplers: 0 # CHECK-NEXT: StaticSamplersOffset: 64 # CHECK-NEXT: Parameters: -# CHECK-NEXT: - ParameterType: 0 -# CHECK-NEXT: ShaderVisibility: 3 +# CHECK-NEXT: - ParameterType: DescriptorTable +# CHECK-NEXT: ShaderVisibility: Domain # CHECK-NEXT: Table: # CHECK-NEXT: NumRanges: 1 # CHECK-NEXT: RangesOffset: 44 # CHECK-NEXT: Ranges: -# CHECK-NEXT: - RangeType: 0 +# CHECK-NEXT: - RangeType: SRV # CHECK-NEXT: NumDescriptors: -1 # CHECK-NEXT: BaseShaderRegister: 42 # CHECK-NEXT: RegisterSpace: 43 diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.1.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.1.yaml index d8f399010053e..c09726defe4a5 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.1.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-DescriptorTable1.1.yaml @@ -20,12 +20,12 @@ Parts: NumStaticSamplers: 0 StaticSamplersOffset: 68 Parameters: - - ParameterType: 0 # SRV - ShaderVisibility: 3 # Domain + - ParameterType: DescriptorTable + ShaderVisibility: Domain Table: NumRanges: 1 Ranges: - - RangeType: 0 + - RangeType: SRV NumDescriptors: -1 BaseShaderRegister: 42 RegisterSpace: 43 @@ -43,13 +43,13 @@ Parts: # CHECK-NEXT: NumStaticSamplers: 0 # CHECK-NEXT: StaticSamplersOffset: 68 # CHECK-NEXT: Parameters: -# CHECK-NEXT: - ParameterType: 0 -# CHECK-NEXT: ShaderVisibility: 3 +# CHECK-NEXT: - ParameterType: DescriptorTable +# CHECK-NEXT: ShaderVisibility: Domain # CHECK-NEXT: Table: # CHECK-NEXT: NumRanges: 1 # CHECK-NEXT: RangesOffset: 44 # CHECK-NEXT: Ranges: -# CHECK-NEXT: - RangeType: 0 +# CHECK-NEXT: - RangeType: SRV # CHECK-NEXT: NumDescriptors: -1 # CHECK-NEXT: BaseShaderRegister: 42 # CHECK-NEXT: RegisterSpace: 43 diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-Invalid-StaticSamplersOffset.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-Invalid-StaticSamplersOffset.yaml index e805526ea7c51..1322a4ef365ad 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-Invalid-StaticSamplersOffset.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-Invalid-StaticSamplersOffset.yaml @@ -21,8 +21,8 @@ Parts: NumStaticSamplers: 0 StaticSamplersOffset: 0 Parameters: - - ParameterType: 2 - ShaderVisibility: 3 + - ParameterType: SRV + ShaderVisibility: Domain Descriptor: ShaderRegister: 31 RegisterSpace: 32 diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-MultipleParameters.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-MultipleParameters.yaml index 26d56536b9e44..684ada465d8fc 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-MultipleParameters.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-MultipleParameters.yaml @@ -19,30 +19,30 @@ Parts: NumStaticSamplers: 0 StaticSamplersOffset: 140 Parameters: - - ParameterType: 1 # Constants32Bit - ShaderVisibility: 2 # Hull + - ParameterType: Constants32Bit + ShaderVisibility: Hull Constants: Num32BitValues: 16 ShaderRegister: 15 RegisterSpace: 14 - - ParameterType: 1 # Constants32Bit - ShaderVisibility: 4 # Geometry + - ParameterType: Constants32Bit + ShaderVisibility: Geometry Constants: Num32BitValues: 21 ShaderRegister: 22 RegisterSpace: 23 - - ParameterType: 2 # SRV - ShaderVisibility: 3 # Domain + - ParameterType: SRV + ShaderVisibility: Domain Descriptor: ShaderRegister: 31 RegisterSpace: 32 DATA_STATIC_WHILE_SET_AT_EXECUTE: true - - ParameterType: 0 # SRV - ShaderVisibility: 3 # Domain + - ParameterType: DescriptorTable + ShaderVisibility: Domain Table: NumRanges: 1 Ranges: - - RangeType: 0 + - RangeType: SRV NumDescriptors: -1 BaseShaderRegister: 42 RegisterSpace: 43 @@ -60,31 +60,31 @@ Parts: # CHECK-NEXT: NumStaticSamplers: 0 # CHECK-NEXT: StaticSamplersOffset: 140 # CHECK-NEXT: Parameters: -# CHECK-NEXT: - ParameterType: 1 -# CHECK-NEXT: ShaderVisibility: 2 +# CHECK-NEXT: - ParameterType: Constants32Bit +# CHECK-NEXT: ShaderVisibility: Hull # CHECK-NEXT: Constants: # CHECK-NEXT: Num32BitValues: 16 # CHECK-NEXT: RegisterSpace: 14 # CHECK-NEXT: ShaderRegister: 15 -# CHECK-NEXT: - ParameterType: 1 -# CHECK-NEXT: ShaderVisibility: 4 +# CHECK-NEXT: - ParameterType: Constants32Bit +# CHECK-NEXT: ShaderVisibility: Geometry # CHECK-NEXT: Constants: # CHECK-NEXT: Num32BitValues: 21 # CHECK-NEXT: RegisterSpace: 23 # CHECK-NEXT: ShaderRegister: 22 -# CHECK-NEXT: - ParameterType: 2 -# CHECK-NEXT: ShaderVisibility: 3 +# CHECK-NEXT: - ParameterType: SRV +# CHECK-NEXT: ShaderVisibility: Domain # CHECK-NEXT: Descriptor: # CHECK-NEXT: RegisterSpace: 32 # CHECK-NEXT: ShaderRegister: 31 # CHECK-NEXT: DATA_STATIC_WHILE_SET_AT_EXECUTE: true -# CHECK-NEXT: - ParameterType: 0 -# CHECK-NEXT: ShaderVisibility: 3 +# CHECK-NEXT: - ParameterType: DescriptorTable +# CHECK-NEXT: ShaderVisibility: Domain # CHECK-NEXT: Table: # CHECK-NEXT: NumRanges: 1 # CHECK-NEXT: RangesOffset: 116 # CHECK-NEXT: Ranges: -# CHECK-NEXT: - RangeType: 0 +# CHECK-NEXT: - RangeType: SRV # CHECK-NEXT: NumDescriptors: -1 # CHECK-NEXT: BaseShaderRegister: 42 # CHECK-NEXT: RegisterSpace: 43 diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-OptionalOffsets.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-OptionalOffsets.yaml index 88d7c632968be..00bc190c0903d 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-OptionalOffsets.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-OptionalOffsets.yaml @@ -31,24 +31,24 @@ Parts: NumRootParameters: 3 NumStaticSamplers: 0 Parameters: - - ParameterType: 1 # RootConstants - ShaderVisibility: 0 + - ParameterType: Constants32Bit + ShaderVisibility: All Constants: Num32BitValues: 16 ShaderRegister: 15 RegisterSpace: 14 - - ParameterType: 2 # SRV - ShaderVisibility: 0 + - ParameterType: SRV + ShaderVisibility: All Descriptor: ShaderRegister: 31 RegisterSpace: 32 DATA_STATIC_WHILE_SET_AT_EXECUTE: true - - ParameterType: 0 # Descriptor Table - ShaderVisibility: 0 + - ParameterType: DescriptorTable + ShaderVisibility: All Table: NumRanges: 1 Ranges: - - RangeType: 0 # CBV + - RangeType: CBuffer NumDescriptors: -1 BaseShaderRegister: 42 RegisterSpace: 43 diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.0.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.0.yaml index 347d8f3be1710..eb940865e7c66 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.0.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.0.yaml @@ -33,24 +33,24 @@ Parts: NumStaticSamplers: 0 StaticSamplersOffset: 108 Parameters: - - ParameterType: 1 # RootConstants - ShaderVisibility: 0 + - ParameterType: Constants32Bit + ShaderVisibility: All Constants: Num32BitValues: 16 ShaderRegister: 15 RegisterSpace: 14 - - ParameterType: 2 # SRV - ShaderVisibility: 0 + - ParameterType: CBV + ShaderVisibility: All Descriptor: ShaderRegister: 31 RegisterSpace: 32 DATA_STATIC_WHILE_SET_AT_EXECUTE: true - - ParameterType: 0 # Descriptor Table - ShaderVisibility: 0 + - ParameterType: DescriptorTable + ShaderVisibility: All Table: NumRanges: 1 Ranges: - - RangeType: 0 # CBV + - RangeType: CBuffer NumDescriptors: -1 BaseShaderRegister: 42 RegisterSpace: 43 diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.1.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.1.yaml index 8e03e1a8b29be..73e89c2dbe336 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.1.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplerOffset1.1.yaml @@ -33,24 +33,24 @@ Parts: NumStaticSamplers: 0 StaticSamplersOffset: 116 Parameters: - - ParameterType: 1 # RootConstants - ShaderVisibility: 0 + - ParameterType: Constants32Bit + ShaderVisibility: All Constants: Num32BitValues: 16 ShaderRegister: 15 RegisterSpace: 14 - - ParameterType: 2 # SRV - ShaderVisibility: 0 + - ParameterType: SRV + ShaderVisibility: All Descriptor: ShaderRegister: 31 RegisterSpace: 32 DATA_STATIC_WHILE_SET_AT_EXECUTE: true - - ParameterType: 0 # Descriptor Table - ShaderVisibility: 0 + - ParameterType: DescriptorTable + ShaderVisibility: All Table: NumRanges: 1 Ranges: - - RangeType: 0 # CBV + - RangeType: CBuffer NumDescriptors: -1 BaseShaderRegister: 42 RegisterSpace: 43 diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers-Defaults.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers-Defaults.yaml index 5df7da87aafd2..a45e3b025a5c0 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers-Defaults.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers-Defaults.yaml @@ -22,7 +22,7 @@ Parts: Samplers: - ShaderRegister: 31 RegisterSpace: 32 - ShaderVisibility: 7 + ShaderVisibility: Mesh AllowInputAssemblerInputLayout: true DenyGeometryShaderRootAccess: true @@ -36,18 +36,18 @@ Parts: #CHECK-NEXT: StaticSamplersOffset: 24 #CHECK-NEXT: Parameters: [] #CHECK-NEXT: Samplers: -#CHECK-NEXT: - Filter: 85 -#CHECK-NEXT: AddressU: 1 -#CHECK-NEXT: AddressV: 1 -#CHECK-NEXT: AddressW: 1 +#CHECK-NEXT: - Filter: Anisotropic +#CHECK-NEXT: AddressU: Wrap +#CHECK-NEXT: AddressV: Wrap +#CHECK-NEXT: AddressW: Wrap #CHECK-NEXT: MipLODBias: 0 #CHECK-NEXT: MaxAnisotropy: 16 -#CHECK-NEXT: ComparisonFunc: 4 -#CHECK-NEXT: BorderColor: 2 +#CHECK-NEXT: ComparisonFunc: LessEqual +#CHECK-NEXT: BorderColor: OpaqueWhite #CHECK-NEXT: MinLOD: 0 #CHECK-NEXT: MaxLOD: 3.40282e+38 #CHECK-NEXT: ShaderRegister: 31 #CHECK-NEXT: RegisterSpace: 32 -#CHECK-NEXT: ShaderVisibility: 7 +#CHECK-NEXT: ShaderVisibility: Mesh #CHECK-NEXT: AllowInputAssemblerInputLayout: true #CHECK-NEXT: DenyGeometryShaderRootAccess: true diff --git a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml index 888a32b351690..745473117c937 100644 --- a/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml +++ b/llvm/test/ObjectYAML/DXContainer/RootSignature-StaticSamplers.yaml @@ -20,19 +20,19 @@ Parts: StaticSamplersOffset: 24 Parameters: [] Samplers: - - Filter: 16 - AddressU: 1 - AddressV: 2 - AddressW: 5 + - Filter: MinLinearMagMipPoint + AddressU: Wrap + AddressV: Mirror + AddressW: MirrorOnce MipLODBias: 1.23 MaxAnisotropy: 20 - ComparisonFunc: 4 - BorderColor: 0 + ComparisonFunc: LessEqual + BorderColor: TransparentBlack MinLOD: 4.56 MaxLOD: 8.90 ShaderRegister: 31 RegisterSpace: 32 - ShaderVisibility: 7 + ShaderVisibility: Mesh AllowInputAssemblerInputLayout: true DenyGeometryShaderRootAccess: true @@ -46,18 +46,18 @@ Parts: #CHECK-NEXT: StaticSamplersOffset: 24 #CHECK-NEXT: Parameters: [] #CHECK-NEXT: Samplers: -#CHECK-NEXT: - Filter: 16 -#CHECK-NEXT: AddressU: 1 -#CHECK-NEXT: AddressV: 2 -#CHECK-NEXT: AddressW: 5 +#CHECK-NEXT: - Filter: MinLinearMagMipPoint +#CHECK-NEXT: AddressU: Wrap +#CHECK-NEXT: AddressV: Mirror +#CHECK-NEXT: AddressW: MirrorOnce #CHECK-NEXT: MipLODBias: 1.23 #CHECK-NEXT: MaxAnisotropy: 20 -#CHECK-NEXT: ComparisonFunc: 4 -#CHECK-NEXT: BorderColor: 0 +#CHECK-NEXT: ComparisonFunc: LessEqual +#CHECK-NEXT: BorderColor: TransparentBlack #CHECK-NEXT: MinLOD: 4.56 #CHECK-NEXT: MaxLOD: 8.9 #CHECK-NEXT: ShaderRegister: 31 #CHECK-NEXT: RegisterSpace: 32 -#CHECK-NEXT: ShaderVisibility: 7 +#CHECK-NEXT: ShaderVisibility: Mesh #CHECK-NEXT: AllowInputAssemblerInputLayout: true #CHECK-NEXT: DenyGeometryShaderRootAccess: true diff --git a/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp b/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp index a264ca7c3c3f6..b0ad208625436 100644 --- a/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp +++ b/llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp @@ -172,8 +172,8 @@ TEST(RootSignature, HeaderData) { NumStaticSamplers: 0 StaticSamplersOffset: 48 Parameters: - - ParameterType: 1 - ShaderVisibility: 2 + - ParameterType: Constants32Bit + ShaderVisibility: Hull Constants: Num32BitValues: 16 ShaderRegister: 15 @@ -224,8 +224,8 @@ TEST(RootSignature, ParseRootConstants) { NumStaticSamplers: 0 StaticSamplersOffset: 48 Parameters: - - ParameterType: 1 - ShaderVisibility: 2 + - ParameterType: Constants32Bit + ShaderVisibility: Hull Constants: Num32BitValues: 16 ShaderRegister: 15 @@ -276,8 +276,8 @@ TEST(RootSignature, ParseRootDescriptorsV10) { NumStaticSamplers: 0 StaticSamplersOffset: 44 Parameters: - - ParameterType: 2 # SRV - ShaderVisibility: 3 # Domain + - ParameterType: CBV + ShaderVisibility: Domain Descriptor: ShaderRegister: 31 RegisterSpace: 32 @@ -327,8 +327,8 @@ TEST(RootSignature, ParseRootDescriptorsV11) { NumStaticSamplers: 0 StaticSamplersOffset: 48 Parameters: - - ParameterType: 2 # SRV - ShaderVisibility: 3 # Domain + - ParameterType: CBV + ShaderVisibility: Domain Descriptor: ShaderRegister: 31 RegisterSpace: 32 @@ -379,12 +379,12 @@ TEST(RootSignature, ParseDescriptorTableV10) { NumStaticSamplers: 0 StaticSamplersOffset: 64 Parameters: - - ParameterType: 0 # SRV - ShaderVisibility: 3 # Domain + - ParameterType: DescriptorTable + ShaderVisibility: Domain Table: NumRanges: 1 Ranges: - - RangeType: 0 + - RangeType: SRV NumDescriptors: 41 BaseShaderRegister: 42 RegisterSpace: 43 @@ -435,12 +435,12 @@ TEST(RootSignature, ParseDescriptorTableV11) { NumStaticSamplers: 0 StaticSamplersOffset: 68 Parameters: - - ParameterType: 0 # Descriptor Table - ShaderVisibility: 3 # Domain + - ParameterType: DescriptorTable + ShaderVisibility: Domain Table: NumRanges: 1 Ranges: - - RangeType: 0 + - RangeType: SRV NumDescriptors: -1 BaseShaderRegister: 42 RegisterSpace: 43 @@ -492,19 +492,19 @@ TEST(RootSignature, ParseStaticSamplers) { StaticSamplersOffset: 24 Parameters: [] Samplers: - - Filter: 16 - AddressU: 1 - AddressV: 2 - AddressW: 5 + - Filter: MinLinearMagMipPoint + AddressU: Wrap + AddressV: Mirror + AddressW: MirrorOnce MipLODBias: 1.23 MaxAnisotropy: 20 - ComparisonFunc: 4 - BorderColor: 0 + ComparisonFunc: LessEqual + BorderColor: TransparentBlack MinLOD: 4.56 MaxLOD: 8.90 ShaderRegister: 31 RegisterSpace: 32 - ShaderVisibility: 7 + ShaderVisibility: Mesh AllowInputAssemblerInputLayout: true DenyGeometryShaderRootAccess: true )")); From 1a6b2b64b6fbbb33ce65ae27a3a9ded4545b48aa Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Fri, 12 Sep 2025 14:34:44 -0400 Subject: [PATCH 168/734] [MLIR] enable Standalone example test for Windows (#158183) This PR turns on all Standalone tests for Windows except for the plugins (which aren't enabled by default). --- mlir/examples/standalone/CMakeLists.txt | 4 +++- mlir/examples/standalone/python/CMakeLists.txt | 4 ++++ mlir/examples/standalone/test/CMakeLists.txt | 4 +++- .../standalone/test/Standalone/standalone-pass-plugin.mlir | 1 + .../standalone/test/Standalone/standalone-plugin.mlir | 1 + mlir/test/Examples/standalone/lit.local.cfg | 1 + mlir/test/Examples/standalone/test.toy | 3 ++- mlir/test/lit.site.cfg.py.in | 1 + 8 files changed, 16 insertions(+), 3 deletions(-) diff --git a/mlir/examples/standalone/CMakeLists.txt b/mlir/examples/standalone/CMakeLists.txt index 88dfa3e5d57a3..03627c0c10496 100644 --- a/mlir/examples/standalone/CMakeLists.txt +++ b/mlir/examples/standalone/CMakeLists.txt @@ -60,5 +60,7 @@ if(MLIR_ENABLE_BINDINGS_PYTHON) endif() add_subdirectory(test) add_subdirectory(standalone-opt) -add_subdirectory(standalone-plugin) +if(NOT WIN32) + add_subdirectory(standalone-plugin) +endif() add_subdirectory(standalone-translate) diff --git a/mlir/examples/standalone/python/CMakeLists.txt b/mlir/examples/standalone/python/CMakeLists.txt index a0eca9c095775..1ab27ce3b533a 100644 --- a/mlir/examples/standalone/python/CMakeLists.txt +++ b/mlir/examples/standalone/python/CMakeLists.txt @@ -26,6 +26,8 @@ declare_mlir_python_extension(StandalonePythonSources.Pybind11Extension ADD_TO_PARENT StandalonePythonSources SOURCES StandaloneExtensionPybind11.cpp + PRIVATE_LINK_LIBS + LLVMSupport EMBED_CAPI_LINK_LIBS StandaloneCAPI PYTHON_BINDINGS_LIBRARY pybind11 @@ -36,6 +38,8 @@ declare_mlir_python_extension(StandalonePythonSources.NanobindExtension ADD_TO_PARENT StandalonePythonSources SOURCES StandaloneExtensionNanobind.cpp + PRIVATE_LINK_LIBS + LLVMSupport EMBED_CAPI_LINK_LIBS StandaloneCAPI PYTHON_BINDINGS_LIBRARY nanobind diff --git a/mlir/examples/standalone/test/CMakeLists.txt b/mlir/examples/standalone/test/CMakeLists.txt index fdde159064287..8864563df8a33 100644 --- a/mlir/examples/standalone/test/CMakeLists.txt +++ b/mlir/examples/standalone/test/CMakeLists.txt @@ -14,8 +14,10 @@ set(STANDALONE_TEST_DEPENDS standalone-capi-test standalone-opt standalone-translate - StandalonePlugin ) +if(NOT WIN32) + list(APPEND STANDALONE_TEST_DEPENDS StandalonePlugin) +endif() if(MLIR_ENABLE_BINDINGS_PYTHON) list(APPEND STANDALONE_TEST_DEPENDS StandalonePythonModules) endif() diff --git a/mlir/examples/standalone/test/Standalone/standalone-pass-plugin.mlir b/mlir/examples/standalone/test/Standalone/standalone-pass-plugin.mlir index 1d652dc45830c..3020097dc1640 100644 --- a/mlir/examples/standalone/test/Standalone/standalone-pass-plugin.mlir +++ b/mlir/examples/standalone/test/Standalone/standalone-pass-plugin.mlir @@ -1,3 +1,4 @@ +// UNSUPPORTED: system-windows // RUN: mlir-opt %s --load-pass-plugin=%standalone_libs/StandalonePlugin%shlibext --pass-pipeline="builtin.module(standalone-switch-bar-foo)" | FileCheck %s module { diff --git a/mlir/examples/standalone/test/Standalone/standalone-plugin.mlir b/mlir/examples/standalone/test/Standalone/standalone-plugin.mlir index 468932b81a529..900b524c1feb7 100644 --- a/mlir/examples/standalone/test/Standalone/standalone-plugin.mlir +++ b/mlir/examples/standalone/test/Standalone/standalone-plugin.mlir @@ -1,3 +1,4 @@ +// UNSUPPORTED: system-windows // RUN: mlir-opt %s --load-dialect-plugin=%standalone_libs/StandalonePlugin%shlibext --pass-pipeline="builtin.module(standalone-switch-bar-foo)" | FileCheck %s module { diff --git a/mlir/test/Examples/standalone/lit.local.cfg b/mlir/test/Examples/standalone/lit.local.cfg index fe8397c6b9a10..3b12dcbd99e83 100644 --- a/mlir/test/Examples/standalone/lit.local.cfg +++ b/mlir/test/Examples/standalone/lit.local.cfg @@ -10,3 +10,4 @@ config.substitutions.append(("%host_cc", config.host_cc)) config.substitutions.append(("%enable_libcxx", config.enable_libcxx)) config.substitutions.append(("%mlir_cmake_dir", config.mlir_cmake_dir)) config.substitutions.append(("%llvm_use_linker", config.llvm_use_linker)) +config.substitutions.append(("%cmake_build_type", config.cmake_build_type)) diff --git a/mlir/test/Examples/standalone/test.toy b/mlir/test/Examples/standalone/test.toy index e99bab5f0affc..8b6d9dd62b76d 100644 --- a/mlir/test/Examples/standalone/test.toy +++ b/mlir/test/Examples/standalone/test.toy @@ -1,4 +1,5 @@ # RUN: "%cmake_exe" "%mlir_src_root/examples/standalone" -G "%cmake_generator" \ +# RUN: -DCMAKE_BUILD_TYPE=%cmake_build_type \ # RUN: -DCMAKE_CXX_COMPILER=%host_cxx -DCMAKE_C_COMPILER=%host_cc \ # RUN: -DLLVM_ENABLE_LIBCXX=%enable_libcxx -DMLIR_DIR=%mlir_cmake_dir \ # RUN: -DLLVM_USE_LINKER=%llvm_use_linker \ @@ -11,4 +12,4 @@ # if any fail. # CHECK: Passed # CHECK-NOT: Failed -# UNSUPPORTED: target={{.*(windows|android).*}} +# UNSUPPORTED: target={{.*(android).*}} diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in index 8a742a227847b..2fc595dfabbf5 100644 --- a/mlir/test/lit.site.cfg.py.in +++ b/mlir/test/lit.site.cfg.py.in @@ -18,6 +18,7 @@ config.host_cxx = "@HOST_CXX@" config.enable_libcxx = "@LLVM_ENABLE_LIBCXX@" config.host_cmake = "@CMAKE_COMMAND@" config.host_cmake_generator = "@CMAKE_GENERATOR@" +config.cmake_build_type = "@CMAKE_BUILD_TYPE@" config.llvm_use_linker = "@LLVM_USE_LINKER@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.host_arch = "@HOST_ARCH@" From 6af94c566e3826de6b4a09518b78a48a1ffd92d3 Mon Sep 17 00:00:00 2001 From: Elvin Wang Date: Fri, 12 Sep 2025 11:42:08 -0700 Subject: [PATCH 169/734] [IntrinsicEmitter] Make AttributesMap bits adaptive (#157965) Make IntrinsicsToAttributesMap's func. and arg. fields be able to have adaptive sizes based on input other than hardcoded 8bits/8bits. This will ease the pressure for adding new intrinsics in private downstreams. func. attr bitsize will become 7(127/128) vs 8(255/256) --- llvm/lib/IR/Intrinsics.cpp | 8 ---- llvm/test/TableGen/intrinsic-attrs.td | 4 +- .../utils/TableGen/Basic/IntrinsicEmitter.cpp | 41 ++++++++++++------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp index 58a1f745a7122..4d2e8fadff4f7 100644 --- a/llvm/lib/IR/Intrinsics.cpp +++ b/llvm/lib/IR/Intrinsics.cpp @@ -740,14 +740,6 @@ Intrinsic::ID Intrinsic::lookupIntrinsicID(StringRef Name) { #include "llvm/IR/IntrinsicImpl.inc" #undef GET_INTRINSIC_ATTRIBUTES -AttributeSet Intrinsic::getFnAttributes(LLVMContext &C, ID id) { - if (id == 0) - return AttributeSet(); - uint16_t PackedID = IntrinsicsToAttributesMap[id - 1]; - uint8_t FnAttrID = PackedID >> 8; - return getIntrinsicFnAttributeSet(C, FnAttrID); -} - Function *Intrinsic::getOrInsertDeclaration(Module *M, ID id, ArrayRef Tys) { // There can never be multiple globals with the same name of different types, diff --git a/llvm/test/TableGen/intrinsic-attrs.td b/llvm/test/TableGen/intrinsic-attrs.td index bcded0cd2e9f1..ab808445f40a2 100644 --- a/llvm/test/TableGen/intrinsic-attrs.td +++ b/llvm/test/TableGen/intrinsic-attrs.td @@ -25,8 +25,8 @@ def int_deref_ptr_ret : Intrinsic<[llvm_ptr_ty], [], [Dereferenceable 256) - PrintFatalError("Too many unique argument attributes for table!"); - // Note, ID 255 is used to indicate no function attributes. - if (UniqFnAttributes.size() > 255) - PrintFatalError("Too many unique function attributes for table!"); - - // Assign a 16-bit packed ID for each intrinsic. The lower 8-bits will be its - // "argument attribute ID" (index in UniqAttributes) and upper 8 bits will be + const uint8_t UniqAttributesBitSize = Log2_32_Ceil(UniqAttributes.size() + 1); + // Note, ID `-1` is used to indicate no function attributes. + const uint8_t UniqFnAttributesBitSize = + Log2_32_Ceil(UniqFnAttributes.size() + 2); + const uint16_t NoFunctionAttrsID = + maskTrailingOnes(UniqFnAttributesBitSize); + if (UniqAttributesBitSize + UniqFnAttributesBitSize > 16) + PrintFatalError( + "More than 16 bits are used for IntrinsicsToAttributesMap's entry!"); + + // Assign a 16-bit packed ID for each intrinsic. The lower bits will be its + // "argument attribute ID" (index in UniqAttributes) and upper bits will be // its "function attribute ID" (index in UniqFnAttributes). for (const CodeGenIntrinsic &Int : Ints) { uint16_t FnAttrIndex = hasFnAttributes(Int) ? UniqFnAttributes[&Int] : NoFunctionAttrsID; - OS << formatv("\n {} << 8 | {}, // {}", FnAttrIndex, - UniqAttributes[&Int], Int.Name); + OS << formatv("\n {} << {} | {}, // {}", FnAttrIndex, + UniqAttributesBitSize, UniqAttributes[&Int], Int.Name); } OS << R"( @@ -749,8 +752,8 @@ AttributeList Intrinsic::getAttributes(LLVMContext &C, ID id, return AttributeList(); uint16_t PackedID = IntrinsicsToAttributesMap[id - 1]; - uint8_t FnAttrID = PackedID >> 8; - uint8_t ArgAttrID = PackedID & 0xFF; + uint16_t FnAttrID = PackedID >> ({}); + uint16_t ArgAttrID = PackedID & ({}); using PairTy = std::pair; alignas(PairTy) char ASStorage[sizeof(PairTy) * {}]; PairTy *AS = reinterpret_cast(ASStorage); @@ -772,10 +775,20 @@ AttributeList Intrinsic::getAttributes(LLVMContext &C, ID id, } return AttributeList::get(C, ArrayRef(AS, NumAttrs)); } + +AttributeSet Intrinsic::getFnAttributes(LLVMContext &C, ID id) { + if (id == 0) + return AttributeSet(); + uint16_t PackedID = IntrinsicsToAttributesMap[id - 1]; + uint16_t FnAttrID = PackedID >> ({}); + return getIntrinsicFnAttributeSet(C, FnAttrID); +} #endif // GET_INTRINSIC_ATTRIBUTES )", - MaxNumAttrs, NoFunctionAttrsID); + UniqAttributesBitSize, + maskTrailingOnes(UniqAttributesBitSize), MaxNumAttrs, + NoFunctionAttrsID, UniqAttributesBitSize); } void IntrinsicEmitter::EmitIntrinsicToBuiltinMap( From 4826039058aba304a874b07b67ecf59affa54a96 Mon Sep 17 00:00:00 2001 From: Andrew Gontarek Date: Fri, 12 Sep 2025 11:46:51 -0700 Subject: [PATCH 170/734] [LLDB][NVIDIA] Add NVPTX architecture support (#158334) - Introduced a new method `IsNVPTX()` in `ArchSpec` to check for NVPTX architecture. - Implemented the corresponding method in `ArchSpec.cpp` to utilize the existing triple architecture checks. --- lldb/include/lldb/Utility/ArchSpec.h | 5 +++++ lldb/source/Utility/ArchSpec.cpp | 2 ++ 2 files changed, 7 insertions(+) diff --git a/lldb/include/lldb/Utility/ArchSpec.h b/lldb/include/lldb/Utility/ArchSpec.h index 96bd5e3597b68..361108fd8f0e7 100644 --- a/lldb/include/lldb/Utility/ArchSpec.h +++ b/lldb/include/lldb/Utility/ArchSpec.h @@ -327,6 +327,11 @@ class ArchSpec { /// \return a boolean value. bool IsMIPS() const; + /// If NVPTX architecture return true. + /// + /// \return a boolean value. + bool IsNVPTX() const; + /// Returns a string representing current architecture as a target CPU for /// tools like compiler, disassembler etc. /// diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp index 1b8dae39735df..2a87cc6bf7de9 100644 --- a/lldb/source/Utility/ArchSpec.cpp +++ b/lldb/source/Utility/ArchSpec.cpp @@ -545,6 +545,8 @@ const char *ArchSpec::GetArchitectureName() const { bool ArchSpec::IsMIPS() const { return GetTriple().isMIPS(); } +bool ArchSpec::IsNVPTX() const { return GetTriple().isNVPTX(); } + std::string ArchSpec::GetTargetABI() const { std::string abi; From ef7de8d1447c822dec72d685d85053216936b895 Mon Sep 17 00:00:00 2001 From: choikwa <5455710+choikwa@users.noreply.github.com> Date: Fri, 12 Sep 2025 14:51:36 -0400 Subject: [PATCH 171/734] [AMDGPU] Remove scope check in SIInsertWaitcnts::generateWaitcntInstBefore (#157821) This change was motivated by CK where many VMCNT(0)'s were generated due to instructions lacking !alias.scope metadata. The two causes of this were: 1) LowerLDSModule not tacking on scope metadata on a single LDS variable 2) IPSCCP pass before inliner replacing noalias ptr derivative with a global value, which made inliner unable to track it back to the noalias ptr argument. However, it turns out that IPSCCP losing the scope information was largely ineffectual as ScopedNoAliasAA was able to handle asymmetric condition, where one MemLoc was missing scope, and still return NoAlias result. AMDGPU however was checking for existence of scope in SIInsertWaitcnts and conservatively treating it as aliasing all and inserted VMCNT(0) before DS_READs, forcing it to wait for all previous LDS DMA instructions. Since we know that ScopedNoAliasAA can handle asymmetry, we should also allow AA query to determine if two MIs may alias. Passed PSDB. Previous attempt to address the issue in IPSCCP, likely stalled: https://github.com/llvm/llvm-project/pull/154522 This solution may be preferrable over that as issue only affects AMDGPU. --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 8 +------- llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index b163a274396ff..ae75fb529dade 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1941,13 +1941,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // LOAD_CNT is only relevant to vgpr or LDS. unsigned RegNo = FIRST_LDS_VGPR; - // Only objects with alias scope info were added to LDSDMAScopes array. - // In the absense of the scope info we will not be able to disambiguate - // aliasing here. There is no need to try searching for a corresponding - // store slot. This is conservatively correct because in that case we - // will produce a wait using the first (general) LDS DMA wait slot which - // will wait on all of them anyway. - if (Ptr && Memop->getAAInfo() && Memop->getAAInfo().Scope) { + if (Ptr && Memop->getAAInfo()) { const auto &LDSDMAStores = ScoreBrackets.getLDSDMAStores(); for (unsigned I = 0, E = LDSDMAStores.size(); I != E; ++I) { if (MI.mayAlias(AA, *LDSDMAStores[I], true)) diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll index 0bd8667d17e52..a00aca34252b1 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll @@ -26,7 +26,6 @@ define amdgpu_kernel void @test_waitcnt(ptr addrspace(1) %global_buffer, ptr add ; CHECK-NEXT: ds_write_b32 v1, v3 ; CHECK-NEXT: ds_write_b32 v2, v3 ; CHECK-NEXT: ; sched_barrier mask(0x00000000) -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: ds_read_b32 v1, v1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: global_store_dword v0, v1, s[0:1] offset:16 From 1756b6e59cb1bbf78a9122c008ab0c6d413e1497 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Fri, 12 Sep 2025 12:46:24 -0700 Subject: [PATCH 172/734] [bazel] Fix buildifier in tblgen.bzl (#158351) --- utils/bazel/llvm-project-overlay/mlir/tblgen.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl b/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl index 2213d220da269..c94935216e0e9 100644 --- a/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl +++ b/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl @@ -506,6 +506,7 @@ def gentbl_sharded_ops( includes: See gentbl_rule.includes deps: See gentbl_rule.deps strip_include_prefix: Attribute to pass through to cc_library. + **kwargs: Passed through to all generated rules. """ cc_lib_name = name + "__gentbl_cc_lib" gentbl_cc_library( From aabf18d7184298566993e3141606cd79ff617d2d Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 12 Sep 2025 13:04:53 -0700 Subject: [PATCH 173/734] Revert "[DebugLine] Correct debug line emittion" (#158343) Reverts llvm/llvm-project#157529 Sorry, I missed that the missed that the LLVM test was using clang - layering dictates thats not OK. Please readjust the test case to work like the existing test coverage (or perhaps the existing test coverage is sufficient?) and post a new PR. --- llvm/lib/MC/MCDwarf.cpp | 30 ++---- llvm/test/DebugInfo/ARM/stmt_seq_macho.test | 98 ------------------- .../X86/DW_AT_LLVM_stmt_seq_sec_offset.ll | 29 +++--- llvm/test/MC/ELF/debug-loc-label.s | 54 +++++----- 4 files changed, 49 insertions(+), 162 deletions(-) delete mode 100644 llvm/test/DebugInfo/ARM/stmt_seq_macho.test diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index e8f000a584839..e7c0d37e8f99b 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -181,7 +181,7 @@ void MCDwarfLineTable::emitOne( unsigned FileNum, LastLine, Column, Flags, Isa, Discriminator; bool IsAtStartSeq; - MCSymbol *PrevLabel; + MCSymbol *LastLabel; auto init = [&]() { FileNum = 1; LastLine = 1; @@ -189,31 +189,21 @@ void MCDwarfLineTable::emitOne( Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0; Isa = 0; Discriminator = 0; - PrevLabel = nullptr; + LastLabel = nullptr; IsAtStartSeq = true; }; init(); // Loop through each MCDwarfLineEntry and encode the dwarf line number table. bool EndEntryEmitted = false; - for (auto It = LineEntries.begin(); It != LineEntries.end(); ++It) { - auto LineEntry = *It; - MCSymbol *CurrLabel = LineEntry.getLabel(); + for (const MCDwarfLineEntry &LineEntry : LineEntries) { + MCSymbol *Label = LineEntry.getLabel(); const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo(); if (LineEntry.LineStreamLabel) { if (!IsAtStartSeq) { - auto *Label = CurrLabel; - auto NextIt = It + 1; - // LineEntry with a null Label is probably a fake LineEntry we added - // when `-emit-func-debug-line-table-offsets` in order to terminate the - // sequence. Look for the next Label if possible, otherwise we will set - // the PC to the end of the section. - if (!Label && NextIt != LineEntries.end()) { - Label = NextIt->getLabel(); - } - MCOS->emitDwarfLineEndEntry(Section, PrevLabel, - /*EndLabel =*/Label); + MCOS->emitDwarfLineEndEntry(Section, LastLabel, + /*EndLabel =*/LastLabel); init(); } MCOS->emitLabel(LineEntry.LineStreamLabel, LineEntry.StreamLabelDefLoc); @@ -221,7 +211,7 @@ void MCDwarfLineTable::emitOne( } if (LineEntry.IsEndEntry) { - MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, PrevLabel, CurrLabel, + MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, Label, asmInfo->getCodePointerSize()); init(); EndEntryEmitted = true; @@ -268,12 +258,12 @@ void MCDwarfLineTable::emitOne( // At this point we want to emit/create the sequence to encode the delta in // line numbers and the increment of the address from the previous Label // and the current Label. - MCOS->emitDwarfAdvanceLineAddr(LineDelta, PrevLabel, CurrLabel, + MCOS->emitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label, asmInfo->getCodePointerSize()); Discriminator = 0; LastLine = LineEntry.getLine(); - PrevLabel = CurrLabel; + LastLabel = Label; IsAtStartSeq = false; } @@ -283,7 +273,7 @@ void MCDwarfLineTable::emitOne( // does not track ranges nor terminate the line table. In that case, // conservatively use the section end symbol to end the line table. if (!EndEntryEmitted && !IsAtStartSeq) - MCOS->emitDwarfLineEndEntry(Section, PrevLabel); + MCOS->emitDwarfLineEndEntry(Section, LastLabel); } void MCDwarfLineTable::endCurrentSeqAndEmitLineStreamLabel(MCStreamer *MCOS, diff --git a/llvm/test/DebugInfo/ARM/stmt_seq_macho.test b/llvm/test/DebugInfo/ARM/stmt_seq_macho.test deleted file mode 100644 index f0874bfc45ed2..0000000000000 --- a/llvm/test/DebugInfo/ARM/stmt_seq_macho.test +++ /dev/null @@ -1,98 +0,0 @@ -// RUN: split-file %s %t - -// RUN: clang++ --target=arm64-apple-macos11 \ -// RUN: %t/stmt_seq_macho.cpp -o %t/stmt_seq_macho.o \ -// RUN: -g -Oz -gdwarf-4 -c -mno-outline \ -// RUN: -mllvm -emit-func-debug-line-table-offsets \ -// RUN: -fdebug-compilation-dir=/private/tmp/stmt_seq \ -// RUN: -fno-unwind-tables -fno-exceptions - -// RUN: llvm-dwarfdump -all %t/stmt_seq_macho.o | FileCheck %s - -// CHECK: Address Line Column File ISA Discriminator OpIndex Flags -// CHECK-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- -// CHECK-NEXT: 0x0000000000000000 2 33 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x0000000000000004 2 33 1 0 0 0 is_stmt end_sequence -// CHECK-NEXT: 0x0000000000000004 3 33 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x0000000000000008 3 33 1 0 0 0 is_stmt end_sequence -// CHECK-NEXT: 0x0000000000000008 4 33 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x000000000000000c 4 33 1 0 0 0 is_stmt end_sequence -// CHECK-NEXT: 0x000000000000000c 7 10 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x0000000000000010 7 3 1 0 0 0 -// CHECK-NEXT: 0x0000000000000014 7 3 1 0 0 0 end_sequence -// CHECK-NEXT: 0x0000000000000014 12 14 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x0000000000000018 12 5 1 0 0 0 -// CHECK-NEXT: 0x000000000000001c 12 5 1 0 0 0 end_sequence -// CHECK-NEXT: 0x000000000000001c 16 14 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x0000000000000020 16 5 1 0 0 0 -// CHECK-NEXT: 0x0000000000000024 16 5 1 0 0 0 end_sequence -// CHECK-NEXT: 0x0000000000000024 21 14 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x0000000000000028 21 5 1 0 0 0 -// CHECK-NEXT: 0x000000000000002c 21 5 1 0 0 0 end_sequence -// CHECK-NEXT: 0x000000000000002c 25 20 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x0000000000000030 26 5 1 0 0 0 is_stmt -// CHECK-NEXT: 0x0000000000000034 26 5 1 0 0 0 is_stmt end_sequence -// CHECK-NEXT: 0x0000000000000034 37 0 1 0 0 0 is_stmt -// CHECK-NEXT: 0x0000000000000044 39 12 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x0000000000000050 40 12 1 0 0 0 is_stmt -// CHECK-NEXT: 0x0000000000000058 40 9 1 0 0 0 is_stmt -// CHECK-NEXT: 0x000000000000005c 41 12 1 0 0 0 is_stmt -// CHECK-NEXT: 0x0000000000000068 42 12 1 0 0 0 is_stmt -// CHECK-NEXT: 0x0000000000000070 41 9 1 0 0 0 is_stmt -// CHECK-NEXT: 0x0000000000000074 46 18 1 0 0 0 is_stmt -// CHECK-NEXT: 0x0000000000000084 42 9 1 0 0 0 is_stmt -// CHECK-NEXT: 0x0000000000000088 47 5 1 0 0 0 is_stmt epilogue_begin -// CHECK-NEXT: 0x0000000000000098 47 5 1 0 0 0 is_stmt end_sequence -// CHECK-NEXT: 0x0000000000000098 34 85 1 0 0 0 is_stmt prologue_end -// CHECK-NEXT: 0x000000000000009c 34 85 1 0 0 0 is_stmt end_sequence -// CHECK-NEXT: 0x000000000000009c 34 86 1 0 0 0 is_stmt prologue_end - -#--- stmt_seq_macho.cpp -#define ATTRIB extern "C" __attribute__((noinline)) -ATTRIB void function_empty_1() {} -ATTRIB void function_empty_2() {} -ATTRIB void function_empty_3() {} - -ATTRIB int function1_copy1(int a) { - return ++a; -} - -ATTRIB int function3_copy1(int a) { - int b = a + 3; - return b + 1; -} - -ATTRIB int function2_copy1(int a) { - return a - 22; -} - -ATTRIB int function3_copy2(int a) { - int b = a + 3; - return b + 1; -} - -ATTRIB int function2_copy2(int a) { - int result = a - 22; - return result; -} - -struct logic_error { - logic_error(const char* s) {} -}; - -struct length_error : public logic_error { - __attribute__((noinline)) explicit length_error(const char* s) : logic_error(s) {} -}; - -int main() { - int sum = 0; - sum += function2_copy2(3); - sum += function3_copy2(41); - sum += function2_copy1(11); - sum += function1_copy1(42); - function_empty_1(); - function_empty_2(); - function_empty_3(); - length_error e("test"); - return sum; -} diff --git a/llvm/test/DebugInfo/X86/DW_AT_LLVM_stmt_seq_sec_offset.ll b/llvm/test/DebugInfo/X86/DW_AT_LLVM_stmt_seq_sec_offset.ll index f17c6e5429b6b..58f6495924b90 100644 --- a/llvm/test/DebugInfo/X86/DW_AT_LLVM_stmt_seq_sec_offset.ll +++ b/llvm/test/DebugInfo/X86/DW_AT_LLVM_stmt_seq_sec_offset.ll @@ -14,7 +14,7 @@ ; STMT_SEQ: DW_AT_LLVM_stmt_sequence [DW_FORM_sec_offset] (0x00000043) ; STMT_SEQ: DW_AT_name {{.*}}func01 ; STMT_SEQ: DW_TAG_subprogram [[[ABBREV_CODE2]]] -; STMT_SEQ: DW_AT_LLVM_stmt_sequence [DW_FORM_sec_offset] (0x00000058) +; STMT_SEQ: DW_AT_LLVM_stmt_sequence [DW_FORM_sec_offset] (0x00000056) ; STMT_SEQ: DW_AT_name {{.*}}main ;; Check the entire line sequence to see that it's correct @@ -29,23 +29,22 @@ ; STMT_SEQ-NEXT: 0x00000050: 05 DW_LNS_set_column (3) ; STMT_SEQ-NEXT: 0x00000052: 67 address += 6, line += 1, op-index += 0 ; STMT_SEQ-NEXT: 0x0000000000000006 6 3 0 0 0 0 is_stmt -; STMT_SEQ-NEXT: 0x00000053: 02 DW_LNS_advance_pc (addr += 2, op-index += 0) -; STMT_SEQ-NEXT: 0x00000055: 00 DW_LNE_end_sequence -; STMT_SEQ-NEXT: 0x0000000000000008 6 3 0 0 0 0 is_stmt end_sequence -; STMT_SEQ-NEXT: 0x00000058: 04 DW_LNS_set_file (0) -; STMT_SEQ-NEXT: 0x0000005a: 00 DW_LNE_set_address (0x00000008) -; STMT_SEQ-NEXT: 0x00000061: 03 DW_LNS_advance_line (10) -; STMT_SEQ-NEXT: 0x00000063: 01 DW_LNS_copy +; STMT_SEQ-NEXT: 0x00000053: 00 DW_LNE_end_sequence +; STMT_SEQ-NEXT: 0x0000000000000006 6 3 0 0 0 0 is_stmt end_sequence +; STMT_SEQ-NEXT: 0x00000056: 04 DW_LNS_set_file (0) +; STMT_SEQ-NEXT: 0x00000058: 00 DW_LNE_set_address (0x00000008) +; STMT_SEQ-NEXT: 0x0000005f: 03 DW_LNS_advance_line (10) +; STMT_SEQ-NEXT: 0x00000061: 01 DW_LNS_copy ; STMT_SEQ-NEXT: 0x0000000000000008 10 0 0 0 0 0 is_stmt -; STMT_SEQ-NEXT: 0x00000064: 05 DW_LNS_set_column (10) -; STMT_SEQ-NEXT: 0x00000066: 0a DW_LNS_set_prologue_end -; STMT_SEQ-NEXT: 0x00000067: 83 address += 8, line += 1, op-index += 0 +; STMT_SEQ-NEXT: 0x00000062: 05 DW_LNS_set_column (10) +; STMT_SEQ-NEXT: 0x00000064: 0a DW_LNS_set_prologue_end +; STMT_SEQ-NEXT: 0x00000065: 83 address += 8, line += 1, op-index += 0 ; STMT_SEQ-NEXT: 0x0000000000000010 11 10 0 0 0 0 is_stmt prologue_end -; STMT_SEQ-NEXT: 0x00000068: 05 DW_LNS_set_column (3) -; STMT_SEQ-NEXT: 0x0000006a: 9f address += 10, line += 1, op-index += 0 +; STMT_SEQ-NEXT: 0x00000066: 05 DW_LNS_set_column (3) +; STMT_SEQ-NEXT: 0x00000068: 9f address += 10, line += 1, op-index += 0 ; STMT_SEQ-NEXT: 0x000000000000001a 12 3 0 0 0 0 is_stmt -; STMT_SEQ-NEXT: 0x0000006b: 02 DW_LNS_advance_pc (addr += 5, op-index += 0) -; STMT_SEQ-NEXT: 0x0000006d: 00 DW_LNE_end_sequence +; STMT_SEQ-NEXT: 0x00000069: 02 DW_LNS_advance_pc (addr += 5, op-index += 0) +; STMT_SEQ-NEXT: 0x0000006b: 00 DW_LNE_end_sequence ; STMT_SEQ-NEXT: 0x000000000000001f 12 3 0 0 0 0 is_stmt end_sequence ; generated from: diff --git a/llvm/test/MC/ELF/debug-loc-label.s b/llvm/test/MC/ELF/debug-loc-label.s index 4200b1192107b..6b5d04777bef4 100644 --- a/llvm/test/MC/ELF/debug-loc-label.s +++ b/llvm/test/MC/ELF/debug-loc-label.s @@ -17,47 +17,43 @@ # CHECK-LINE-TABLE-NEXT: 0x0000002a: 00 DW_LNE_set_address (0x0000000000000000) # CHECK-LINE-TABLE-NEXT: 0x00000035: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000000 1 1 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x00000036: 02 DW_LNS_advance_pc (addr += 8, op-index += 0) -# CHECK-LINE-TABLE-NEXT: 0x00000038: 00 DW_LNE_end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000000000000008 1 1 1 0 0 0 is_stmt end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000003b: 05 DW_LNS_set_column (2) -# CHECK-LINE-TABLE-NEXT: 0x0000003d: 00 DW_LNE_set_address (0x0000000000000008) -# CHECK-LINE-TABLE-NEXT: 0x00000048: 01 DW_LNS_copy +# CHECK-LINE-TABLE-NEXT: 0x00000036: 00 DW_LNE_end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000000000000000 1 1 1 0 0 0 is_stmt end_sequence +# CHECK-LINE-TABLE-NEXT: 0x00000039: 05 DW_LNS_set_column (2) +# CHECK-LINE-TABLE-NEXT: 0x0000003b: 00 DW_LNE_set_address (0x0000000000000008) +# CHECK-LINE-TABLE-NEXT: 0x00000046: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000008 1 2 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x00000049: 02 DW_LNS_advance_pc (addr += 8, op-index += 0) -# CHECK-LINE-TABLE-NEXT: 0x0000004b: 00 DW_LNE_end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000000000000010 1 2 1 0 0 0 is_stmt end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000004e: 05 DW_LNS_set_column (3) -# CHECK-LINE-TABLE-NEXT: 0x00000050: 00 DW_LNE_set_address (0x0000000000000010) -# CHECK-LINE-TABLE-NEXT: 0x0000005b: 01 DW_LNS_copy +# CHECK-LINE-TABLE-NEXT: 0x00000047: 00 DW_LNE_end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000000000000008 1 2 1 0 0 0 is_stmt end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000004a: 05 DW_LNS_set_column (3) +# CHECK-LINE-TABLE-NEXT: 0x0000004c: 00 DW_LNE_set_address (0x0000000000000010) +# CHECK-LINE-TABLE-NEXT: 0x00000057: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000010 1 3 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x0000005c: 02 DW_LNS_advance_pc (addr += 8, op-index += 0) -# CHECK-LINE-TABLE-NEXT: 0x0000005e: 00 DW_LNE_end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000000000000018 1 3 1 0 0 0 is_stmt end_sequence -# CHECK-LINE-TABLE-NEXT: 0x00000061: 05 DW_LNS_set_column (4) -# CHECK-LINE-TABLE-NEXT: 0x00000063: 00 DW_LNE_set_address (0x0000000000000018) -# CHECK-LINE-TABLE-NEXT: 0x0000006e: 01 DW_LNS_copy +# CHECK-LINE-TABLE-NEXT: 0x00000058: 00 DW_LNE_end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000000000000010 1 3 1 0 0 0 is_stmt end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000005b: 05 DW_LNS_set_column (4) +# CHECK-LINE-TABLE-NEXT: 0x0000005d: 00 DW_LNE_set_address (0x0000000000000018) +# CHECK-LINE-TABLE-NEXT: 0x00000068: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000018 1 4 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x0000006f: 05 DW_LNS_set_column (5) -# CHECK-LINE-TABLE-NEXT: 0x00000071: 01 DW_LNS_copy +# CHECK-LINE-TABLE-NEXT: 0x00000069: 05 DW_LNS_set_column (5) +# CHECK-LINE-TABLE-NEXT: 0x0000006b: 01 DW_LNS_copy # CHECK-LINE-TABLE-NEXT: 0x0000000000000018 1 5 1 0 0 0 is_stmt -# CHECK-LINE-TABLE-NEXT: 0x00000072: 02 DW_LNS_advance_pc (addr += 8, op-index += 0) -# CHECK-LINE-TABLE-NEXT: 0x00000074: 00 DW_LNE_end_sequence -# CHECK-LINE-TABLE-NEXT: 0x0000000000000020 1 5 1 0 0 0 is_stmt end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000006c: 00 DW_LNE_end_sequence +# CHECK-LINE-TABLE-NEXT: 0x0000000000000018 1 5 1 0 0 0 is_stmt end_sequence # CHECK-SYM: Symbol table '.symtab' contains 9 entries: # CHECK-SYM-NEXT: Num: Value Size Type Bind Vis Ndx Name # CHECK-SYM-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND # CHECK-SYM-NEXT: 1: 0000000000000000 0 FILE LOCAL DEFAULT ABS test.c # CHECK-SYM-NEXT: 2: 0000000000000000 0 SECTION LOCAL DEFAULT 2 .text -# CHECK-SYM-NEXT: 3: 000000000000003b 0 NOTYPE LOCAL DEFAULT 3 my_label_02 -# CHECK-SYM-NEXT: 4: 000000000000004e 0 NOTYPE LOCAL DEFAULT 3 my_label_03 -# CHECK-SYM-NEXT: 5: 0000000000000061 0 NOTYPE LOCAL DEFAULT 3 my_label_04 -# CHECK-SYM-NEXT: 6: 000000000000004e 0 NOTYPE LOCAL DEFAULT 3 my_label_03.1 -# CHECK-SYM-NEXT: 7: 0000000000000077 0 NOTYPE LOCAL DEFAULT 3 my_label_05 +# CHECK-SYM-NEXT: 3: 0000000000000039 0 NOTYPE LOCAL DEFAULT 3 my_label_02 +# CHECK-SYM-NEXT: 4: 000000000000004a 0 NOTYPE LOCAL DEFAULT 3 my_label_03 +# CHECK-SYM-NEXT: 5: 000000000000005b 0 NOTYPE LOCAL DEFAULT 3 my_label_04 +# CHECK-SYM-NEXT: 6: 000000000000004a 0 NOTYPE LOCAL DEFAULT 3 my_label_03.1 +# CHECK-SYM-NEXT: 7: 000000000000006f 0 NOTYPE LOCAL DEFAULT 3 my_label_05 # CHECK-SYM-NEXT: 8: 0000000000000000 0 FUNC GLOBAL DEFAULT 2 foo -# CHECK-OFFSETS: 0000 3b000000 4e000000 61000000 +# CHECK-OFFSETS: 0000 39000000 4a000000 5b000000 .text .file "test.c" From 9566388cbdefba183dadfb5e116ff7adde08cea5 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 12 Sep 2025 13:35:15 -0700 Subject: [PATCH 174/734] [Github] Delete dependabot config (#158337) Dependabot cannot configure the branch prefix, which means it fails everytime it tries to run because we only allow user/ branches. This is in preparation for using Renovate which supports custom branch prefixes and has other advantages, like the ability to run/get setup without any assisstance from a repository admin unlike dependabot. This makes it significantly more hackable for the rest of the community. --- .github/dependabot.yml | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 8c1dfd39b82c4..0000000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,18 +0,0 @@ -version: 2 -updates: - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "monthly" - groups: - github-actions: - patterns: - - "*" - - package-ecosystem: "pip" - directory: "/llvm/docs" - schedule: - interval: "monthly" - groups: - llvm-docs-requirements: - patterns: - - "*" From 01d85e73d9711315b59ca3c80f894ab314d2c055 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 12 Sep 2025 13:41:08 -0700 Subject: [PATCH 175/734] [gn build] Port 220d705d2189d --- llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn | 1 + llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn index 93c0f3c51fe89..57e9300159971 100644 --- a/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn @@ -21,6 +21,7 @@ static_library("Format") { "MacroExpander.cpp", "MatchFilePath.cpp", "NamespaceEndCommentsFixer.cpp", + "NumericLiteralCaseFixer.cpp", "NumericLiteralInfo.cpp", "ObjCPropertyAttributeOrderFixer.cpp", "QualifierAlignmentFixer.cpp", diff --git a/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn index 88521a8e59da2..c501f121df4a8 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn @@ -38,6 +38,7 @@ unittest("FormatTests") { "MacroExpanderTest.cpp", "MatchFilePathTest.cpp", "NamespaceEndCommentsFixerTest.cpp", + "NumericLiteralCaseTest.cpp", "NumericLiteralInfoTest.cpp", "ObjCPropertyAttributeOrderFixerTest.cpp", "QualifierFixerTest.cpp", From d161d37dd3169a69cc07abd737d210f5073bac8f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 12 Sep 2025 13:41:09 -0700 Subject: [PATCH 176/734] [gn build] Port 8c0f3b6e8f8db --- llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn index 14e6671f7d9a8..b0c2ca333cfab 100644 --- a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn @@ -16,6 +16,7 @@ unittest("CoreTests") { ] sources = [ "BinaryContext.cpp", + "ClusteredRows.cpp", "DynoStats.cpp", "MCPlusBuilder.cpp", "MemoryMaps.cpp", From e1efb51080801575712069a0b17a1656b66e7dfe Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 12 Sep 2025 13:41:10 -0700 Subject: [PATCH 177/734] [gn build] Port f3efbce4a73c5 --- llvm/utils/gn/secondary/llvm/lib/TargetParser/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/TargetParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TargetParser/BUILD.gn index 183fa57d47a63..73ed834599e02 100644 --- a/llvm/utils/gn/secondary/llvm/lib/TargetParser/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/TargetParser/BUILD.gn @@ -17,6 +17,7 @@ static_library("TargetParser") { "RISCVISAInfo.cpp", "RISCVTargetParser.cpp", "SubtargetFeature.cpp", + "TargetDataLayout.cpp", "TargetParser.cpp", "Triple.cpp", "X86TargetParser.cpp", From b5516dad6e18db91858449bfa96a5e1271568037 Mon Sep 17 00:00:00 2001 From: jtstogel Date: Fri, 12 Sep 2025 13:49:39 -0700 Subject: [PATCH 178/734] [PGO][test] Ensure test input is writeable after copying. (#158356) This test errors when trying to append to the `%t` file when run in an environment where the source tree is mounted read-only, since `cp` preserves the read-only file permission. --- llvm/test/Verifier/llvm.loop.estimated_trip_count.ll | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll index 3c0bc8a39ebeb..b1e456f5b0ad6 100644 --- a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll +++ b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll @@ -26,36 +26,43 @@ exit: ; No value. ; RUN: cp %s %t +; RUN: chmod u+w %t ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count"}' >> %t ; RUN: not %{RUN} TOO-FEW ; i16 value. ; RUN: cp %s %t +; RUN: chmod u+w %t ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i16 5}' >> %t ; RUN: %{RUN} GOOD ; i32 value. ; RUN: cp %s %t +; RUN: chmod u+w %t ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 5}' >> %t ; RUN: %{RUN} GOOD ; i64 value. ; RUN: cp %s %t +; RUN: chmod u+w %t ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i64 5}' >> %t ; RUN: not %{RUN} BAD-VALUE ; MDString value. ; RUN: cp %s %t +; RUN: chmod u+w %t ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", !"5"}' >> %t ; RUN: not %{RUN} BAD-VALUE ; MDNode value. ; RUN: cp %s %t +; RUN: chmod u+w %t ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", !2}' >> %t ; RUN: echo '!2 = !{i32 5}' >> %t ; RUN: not %{RUN} BAD-VALUE ; Too many values. ; RUN: cp %s %t +; RUN: chmod u+w %t ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 5, i32 5}' >> %t ; RUN: not %{RUN} TOO-MANY From 86bcd1c2b256cd6aa5e65e1a54b63f929d616464 Mon Sep 17 00:00:00 2001 From: Jeff Niu Date: Fri, 12 Sep 2025 13:53:32 -0700 Subject: [PATCH 179/734] [mlir][Intrange] Fix materializing ShapedType constant values (#158359) When materializing integer ranges of splat tensors or vector as constants, they should use DenseElementsAttr of the shaped type, not IntegerAttrs of the element types, since this can violate the invariants of tensor/vector ops. Co-authored-by: Jeff Niu --- .../Analysis/DataFlow/IntegerRangeAnalysis.cpp | 15 ++++++++++++--- .../Arith/Transforms/IntRangeOptimizations.cpp | 2 ++ mlir/test/Dialect/Arith/int-range-opts.mlir | 16 ++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp index e79f6a8aec1cf..70b56ca77b2da 100644 --- a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp @@ -26,6 +26,7 @@ #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/InferIntRangeInterface.h" #include "mlir/Interfaces/LoopLikeInterface.h" +#include "mlir/Support/DebugStringHelper.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Casting.h" @@ -76,9 +77,17 @@ void IntegerValueRangeLattice::onUpdate(DataFlowSolver *solver) const { else dialect = value.getParentBlock()->getParentOp()->getDialect(); - Type type = getElementTypeOrSelf(value); - solver->propagateIfChanged( - cv, cv->join(ConstantValue(IntegerAttr::get(type, *constant), dialect))); + Attribute cstAttr; + if (isa(value.getType())) { + cstAttr = IntegerAttr::get(value.getType(), *constant); + } else if (auto shapedTy = dyn_cast(value.getType())) { + cstAttr = SplatElementsAttr::get(shapedTy, *constant); + } else { + llvm::report_fatal_error( + Twine("FIXME: Don't know how to create a constant for this type: ") + + mlir::debugString(value.getType())); + } + solver->propagateIfChanged(cv, cv->join(ConstantValue(cstAttr, dialect))); } LogicalResult IntegerRangeAnalysis::visitOperation( diff --git a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp index 777ff0ecaa314..2017905587b26 100644 --- a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp @@ -8,6 +8,7 @@ #include +#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h" #include "mlir/Analysis/DataFlowFramework.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" @@ -485,6 +486,7 @@ struct IntRangeOptimizationsPass final MLIRContext *ctx = op->getContext(); DataFlowSolver solver; solver.load(); + solver.load(); solver.load(); if (failed(solver.initializeAndRun(op))) return signalPassFailure(); diff --git a/mlir/test/Dialect/Arith/int-range-opts.mlir b/mlir/test/Dialect/Arith/int-range-opts.mlir index ea5969a100258..e6e48d30cece5 100644 --- a/mlir/test/Dialect/Arith/int-range-opts.mlir +++ b/mlir/test/Dialect/Arith/int-range-opts.mlir @@ -132,3 +132,19 @@ func.func @wraps() -> i8 { %mod = arith.remsi %val, %c64 : i8 return %mod : i8 } + +// ----- + +// CHECK-LABEL: @analysis_crash +func.func @analysis_crash(%arg0: i32, %arg1: tensor<128xi1>) -> tensor<128xi64> { + %c0_i32 = arith.constant 0 : i32 + %cst = arith.constant dense<-1> : tensor<128xi32> + %splat = tensor.splat %arg0 : tensor<128xi32> + %0 = scf.for %arg2 = %c0_i32 to %arg0 step %arg0 iter_args(%arg3 = %splat) -> (tensor<128xi32>) : i32 { + scf.yield %arg3 : tensor<128xi32> + } + %1 = arith.select %arg1, %0#0, %cst : tensor<128xi1>, tensor<128xi32> + // Make sure the analysis doesn't crash when materializing the range as a tensor constant. + %2 = arith.extsi %1 : tensor<128xi32> to tensor<128xi64> + return %2 : tensor<128xi64> +} From f645d209d42c0d4ccb31d48c6663676098d7ec4d Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Fri, 12 Sep 2025 14:07:44 -0700 Subject: [PATCH 180/734] [bazel] Add rules_shell for sh_binary rule (#158365) This is required for the upcoming bazel 9.x release where this rule is no longer automatically available. --- utils/bazel/WORKSPACE | 14 ++++++++++++++ utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 1 + 2 files changed, 15 insertions(+) diff --git a/utils/bazel/WORKSPACE b/utils/bazel/WORKSPACE index da69e1d7cf5a7..00cfea572096a 100644 --- a/utils/bazel/WORKSPACE +++ b/utils/bazel/WORKSPACE @@ -199,3 +199,17 @@ python_register_toolchains( name = "python_3_12", python_version = "3.12", ) + +maybe( + http_archive, + name = "rules_shell", + sha256 = "e6b87c89bd0b27039e3af2c5da01147452f240f75d505f5b6880874f31036307", + strip_prefix = "rules_shell-0.6.1", + url = "https://github.com/bazelbuild/rules_shell/releases/download/v0.6.1/rules_shell-v0.6.1.tar.gz", +) + +load("@rules_shell//shell:repositories.bzl", "rules_shell_dependencies", "rules_shell_toolchains") + +rules_shell_dependencies() + +rules_shell_toolchains() diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index e6f10b08932e5..8fe8258d72e34 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -6,6 +6,7 @@ load("@bazel_skylib//rules:common_settings.bzl", "string_flag") load("@bazel_skylib//rules:expand_template.bzl", "expand_template") load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") load("@rules_python//python:defs.bzl", "py_binary") +load("@rules_shell//shell:sh_binary.bzl", "sh_binary") load("//mlir:tblgen.bzl", "gentbl_cc_library", "gentbl_filegroup", "td_library") load(":binary_alias.bzl", "binary_alias") load(":config.bzl", "llvm_config_defines") From b87f1b22a8d8a77d5360f201af5ba08adbb0a974 Mon Sep 17 00:00:00 2001 From: Alan Li Date: Fri, 12 Sep 2025 14:23:00 -0700 Subject: [PATCH 181/734] [MLIR] Add `InParallelOpInterface` for parallel combining operations (#157736) This commit: - Introduces a new `InParallelOpInterface`, along with the `ParallelCombiningOpInterface`, represent the parallel updating operations we have in a parallel loop of `scf.forall`. - Change the name of `ParallelCombiningOpInterface` to `InParallelOpInterface` as the naming was quite confusing. - `ParallelCombiningOpInterface` now is used to generalize operations that insert into shared tensors within parallel combining regions. Previously, only `tensor.parallel_insert_slice` was supported directly in `scf.InParallelOp` regions. - `tensor.parallel_insert_slice` now implements `ParallelCombiningOpInterface`. This change enables future extensions to support additional parallel combining operations beyond `tensor.parallel_insert_slice`, which have different update semantics, so the `in_parallel` region can correctly and safely represent these kinds of operation without potential mistakes such as races. Author credits: @qedawkins --- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td | 4 +- .../mlir/Dialect/Tensor/IR/TensorOps.td | 15 +-- .../Interfaces/ParallelCombiningOpInterface.h | 2 +- .../ParallelCombiningOpInterface.td | 61 +++++++++- .../TransformOps/LinalgTransformOps.cpp | 12 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 54 +++++---- .../BufferDeallocationOpInterfaceImpl.cpp | 10 +- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 28 +++-- .../BufferizableOpInterfaceImpl.cpp | 4 +- .../Tensor/Transforms/FoldTensorSubsetOps.cpp | 11 +- .../ParallelCombiningOpInterface.cpp | 4 +- .../Dialect/Linalg/drop-unit-extent-dims.mlir | 3 +- mlir/test/Dialect/SCF/invalid.mlir | 2 +- .../SCF/one-shot-bufferize-analysis.mlir | 108 ++++++++++++++++++ ...-shot-bufferize-tensor-copy-insertion.mlir | 30 ++++- .../llvm-project-overlay/mlir/BUILD.bazel | 1 + 16 files changed, 280 insertions(+), 69 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index 88df54174da24..d3c01c31636a7 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -654,7 +654,7 @@ def ForallOp : SCF_Op<"forall", [ def InParallelOp : SCF_Op<"forall.in_parallel", [ Pure, Terminator, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, HasParent<"ForallOp">, ] # GraphRegionNoTerminator.traits> { let summary = "terminates a `forall` block"; @@ -679,8 +679,6 @@ def InParallelOp : SCF_Op<"forall.in_parallel", [ OpBuilder<(ins)>, ]; - // TODO: Add a `InParallelOpInterface` interface for ops that can - // appear inside in_parallel. let extraClassDeclaration = [{ ::llvm::SmallVector<::mlir::BlockArgument> getDests(); ::llvm::iterator_range<::mlir::Block::iterator> getYieldingOps(); diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index 7d396e5c64c28..2453cf5b5b5a4 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -1470,24 +1470,25 @@ def Tensor_PadOp : Tensor_Op<"pad", [ // ParallelInsertSliceOp //===----------------------------------------------------------------------===// -// TODO: Implement InParallelOpInterface. def Tensor_ParallelInsertSliceOp : Tensor_Op<"parallel_insert_slice", [ AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface, + DeclareOpInterfaceMethods, // TODO: Cannot use an interface here atm, verify this manually for now. - // HasParent<"ParallelCombiningOpInterface"> + // HasParent<"InParallelOpInterface"> ]> { let summary = [{ Specify the tensor slice update of a single thread of a parent - ParallelCombiningOpInterface op. + InParallelOpInterface op. }]; let description = [{ The `parallel_insert_slice` yields a subset tensor value to its parent - ParallelCombiningOpInterface. These subset tensor values are aggregated to + InParallelOpInterface. These subset tensor values are aggregated to in some unspecified order into a full tensor value returned by the parent parallel iterating op. The `parallel_insert_slice` is one such op allowed in the - ParallelCombiningOpInterface op. + InParallelOpInterface op. Conflicting writes result in undefined semantics, in that the indices written to by multiple parallel updates might contain data from any of the updates, @@ -1569,8 +1570,8 @@ def Tensor_ParallelInsertSliceOp : Tensor_Op<"parallel_insert_slice", [ return ::llvm::cast(getDest().getType()); } - ParallelCombiningOpInterface getParallelCombiningParent() { - return dyn_cast( + InParallelOpInterface getParallelCombiningParent() { + return dyn_cast( getOperation()->getParentOp()); } diff --git a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h index 72db06163df37..82ab427699f64 100644 --- a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h +++ b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.h @@ -19,7 +19,7 @@ namespace mlir { namespace detail { // TODO: Single region single block interface on interfaces ? -LogicalResult verifyParallelCombiningOpInterface(Operation *op); +LogicalResult verifyInParallelOpInterface(Operation *op); } // namespace detail } // namespace mlir diff --git a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td index 424b4cf0a0a58..ace26f723ef53 100644 --- a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td +++ b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// // -// Defines the interface for ops that perform parallel combining operations. +// Defines the interface for ops that perform in parallel combining +// operations. // //===----------------------------------------------------------------------===// @@ -15,9 +16,9 @@ include "mlir/IR/OpBase.td" -def ParallelCombiningOpInterface : OpInterface<"ParallelCombiningOpInterface"> { +def InParallelOpInterface : OpInterface<"InParallelOpInterface"> { let description = [{ - A parallel combining op is an op with a region. + An in parallel op is an op with a region. This is useful as a terminator to parallel operations that iterate over some set and return tensors while avoiding tight coupling between the @@ -52,8 +53,60 @@ def ParallelCombiningOpInterface : OpInterface<"ParallelCombiningOpInterface"> { ]; // TODO: Single region single block interface on interfaces ? let verify = [{ - return verifyParallelCombiningOpInterface($_op); + return verifyInParallelOpInterface($_op); + }]; +} + +def ParallelCombiningOpInterface : OpInterface<"ParallelCombiningOpInterface"> { + let description = [{ + A parallel combining op is an operation that models parallel contributions + to result tensors within the context of a parent iterating operation. + + This interface is designed for operations that need to coordinate parallel + insertions or contributions to tensors that are being constructed across + multiple parallel iterations. The destination refers to a tensor value that + is assembled by aggregating results from parallel computations; each + parallel iteration may contribute a slice, element, or region to the final + result. No in-place mutation of tensors is implied. + + One significant use case for this interface is `tensor.parallel_insert_slice` + which allows parallel insertion of slices that are aggregated into a + destination tensor. With this interface, other operations that express + similar parallel contributions can also be defined. + + This op works within an op implementing the `InParallelOpInterface` that + specifies how the parallel results are combined. + + Key semantics: + - The operation identifies destination tensors to which iterations + contribute through the `getUpdatedDestinations` method + - Each parallel iteration may produce elements or regions that are + incorporated into the destination tensor + - The parent iterating operation manages the coordination and ensures + proper synchronization of these contributions + + Note: This interface does not verify itself, it is up to the implementing operation + to verify the correctness of the op. }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + Returns the list of destination values this op contributes to. + }], + /*retTy=*/"::mlir::MutableOperandRange", + /*methodName=*/"getUpdatedDestinations", + /*args=*/(ins) + >, + InterfaceMethod< + /*desc=*/[{ + Returns the iterating parent for this op. + }], + /*retTy=*/"::mlir::Operation*", + /*methodName=*/"getIteratingParent", + /*args=*/(ins) + >, + ]; } #endif // MLIR_INTERFACES_PARALLELCOMBININGOPINTERFACE diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index f3db8f7ccfaa1..715eebb3c4a13 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -36,6 +36,7 @@ #include "mlir/IR/BuiltinTypeInterfaces.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/ParallelCombiningOpInterface.h" #include "mlir/Interfaces/TilingInterface.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -4147,12 +4148,11 @@ DiagnosedSilenceableFailure doit(RewriterBase &rewriter, OpTy target, return DiagnosedSilenceableFailure::success(); } - // If we are inside an InParallel region, temporarily set the insertion point - // outside: only tensor.parallel_insert_slice ops are allowed in there. - if constexpr (std::is_same_v) { - rewriter.setInsertionPoint( - target->template getParentOfType()); - } + // If we are inside a `ParallelCombiningOp` region, temporarily set the + // insertion point outside: only ops implementing ParallelCombiningOpInterface + // are allowed in there. + if (isa(target.getOperation())) + rewriter.setInsertionPoint(target->getParentOp()); Value extracted = tensor::ExtractSliceOp::create( rewriter, target.getLoc(), target.getDest(), target.getMixedOffsets(), diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 84f9777a443fd..45b14fcf8aadd 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -21,6 +21,7 @@ #include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Interfaces/FunctionInterfaces.h" +#include "mlir/Interfaces/ParallelCombiningOpInterface.h" #include "mlir/Interfaces/ValueBoundsOpInterface.h" #include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/MapVector.h" @@ -681,7 +682,9 @@ void mlir::scf::promote(RewriterBase &rewriter, scf::ForallOp forallOp) { results.reserve(forallOp.getResults().size()); for (auto &yieldingOp : terminator.getYieldingOps()) { auto parallelInsertSliceOp = - cast(yieldingOp); + dyn_cast(yieldingOp); + if (!parallelInsertSliceOp) + continue; Value dst = parallelInsertSliceOp.getDest(); Value src = parallelInsertSliceOp.getSource(); @@ -1439,12 +1442,9 @@ InParallelOp ForallOp::getTerminator() { SmallVector ForallOp::getCombiningOps(BlockArgument bbArg) { SmallVector storeOps; - InParallelOp inParallelOp = getTerminator(); - for (Operation &yieldOp : inParallelOp.getYieldingOps()) { - if (auto parallelInsertSliceOp = - dyn_cast(yieldOp); - parallelInsertSliceOp && parallelInsertSliceOp.getDest() == bbArg) { - storeOps.push_back(parallelInsertSliceOp); + for (Operation *user : bbArg.getUsers()) { + if (auto parallelOp = dyn_cast(user)) { + storeOps.push_back(parallelOp); } } return storeOps; @@ -1911,8 +1911,10 @@ struct FoldTensorCastOfOutputIntoForallOp auto terminator = newForallOp.getTerminator(); for (auto [yieldingOp, outputBlockArg] : llvm::zip( terminator.getYieldingOps(), newForallOp.getRegionIterArgs())) { - auto insertSliceOp = cast(yieldingOp); - insertSliceOp.getDestMutable().assign(outputBlockArg); + if (auto parallelCombingingOp = + dyn_cast(yieldingOp)) { + parallelCombingingOp.getUpdatedDestinations().assign(outputBlockArg); + } } // Cast results back to the original types. @@ -1971,19 +1973,22 @@ LogicalResult InParallelOp::verify() { if (!forallOp) return this->emitOpError("expected forall op parent"); - // TODO: InParallelOpInterface. for (Operation &op : getRegion().front().getOperations()) { - if (!isa(op)) { - return this->emitOpError("expected only ") - << tensor::ParallelInsertSliceOp::getOperationName() << " ops"; + auto parallelCombiningOp = dyn_cast(&op); + if (!parallelCombiningOp) { + return this->emitOpError("expected only ParallelCombiningOpInterface") + << " ops"; } // Verify that inserts are into out block arguments. - Value dest = cast(op).getDest(); + MutableOperandRange dests = parallelCombiningOp.getUpdatedDestinations(); ArrayRef regionOutArgs = forallOp.getRegionOutArgs(); - if (!llvm::is_contained(regionOutArgs, dest)) - return op.emitOpError("may only insert into an output block argument"); + for (OpOperand &dest : dests) { + if (!llvm::is_contained(regionOutArgs, dest.get())) + return op.emitOpError("may only insert into an output block argument"); + } } + return success(); } @@ -2018,12 +2023,17 @@ OpResult InParallelOp::getParentResult(int64_t idx) { } SmallVector InParallelOp::getDests() { - return llvm::to_vector<4>( - llvm::map_range(getYieldingOps(), [](Operation &op) { - // Add new ops here as needed. - auto insertSliceOp = cast(&op); - return llvm::cast(insertSliceOp.getDest()); - })); + SmallVector updatedDests; + for (Operation &yieldingOp : getYieldingOps()) { + auto parallelCombiningOp = + dyn_cast(&yieldingOp); + if (!parallelCombiningOp) + continue; + for (OpOperand &updatedOperand : + parallelCombiningOp.getUpdatedDestinations()) + updatedDests.push_back(cast(updatedOperand.get())); + } + return updatedDests; } llvm::iterator_range InParallelOp::getYieldingOps() { diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp index a44612410bdee..63216e7cc7fba 100644 --- a/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp @@ -16,7 +16,7 @@ using namespace mlir::bufferization; namespace { /// The `scf.forall.in_parallel` terminator is special in a few ways: /// * It does not implement the BranchOpInterface or -/// RegionBranchTerminatorOpInterface, but the ParallelCombiningOpInterface +/// RegionBranchTerminatorOpInterface, but the InParallelOpInterface /// which is not supported by BufferDeallocation. /// * It has a graph-like region which only allows one specific tensor op /// * After bufferization the nested region is always empty @@ -40,9 +40,9 @@ namespace { /// /// } /// ``` -struct InParallelOpInterface - : public BufferDeallocationOpInterface::ExternalModel { +struct InParallelDeallocOpInterface + : public BufferDeallocationOpInterface::ExternalModel< + InParallelDeallocOpInterface, scf::InParallelOp> { FailureOr process(Operation *op, DeallocationState &state, const DeallocationOptions &options) const { auto inParallelOp = cast(op); @@ -75,7 +75,7 @@ struct ReduceReturnOpInterface void mlir::scf::registerBufferDeallocationOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, SCFDialect *dialect) { - InParallelOp::attachInterface(*ctx); + InParallelOp::attachInterface(*ctx); ReduceReturnOp::attachInterface(*ctx); }); } diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 68584ec4fd814..fa97b49a41d97 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -2976,9 +2976,9 @@ class InsertSliceOpConstantArgumentFolder final if (sourceType != insertSliceOp.getSourceType()) { OpBuilder::InsertionGuard g(rewriter); // The only difference between InsertSliceOp and ParallelInsertSliceOp - // is that the insertion point is just before the ParallelCombiningOp in + // is that the insertion point is just before the InParallelOp in // the parallel case. - if (std::is_same::value) + if (isa(insertSliceOp->getParentOp())) rewriter.setInsertionPoint(insertSliceOp->getParentOp()); toInsert = tensor::CastOp::create(rewriter, insertSliceOp.getLoc(), sourceType, toInsert); @@ -3153,9 +3153,9 @@ struct InsertSliceOpSourceCastInserter final // Insert the cast. OpBuilder::InsertionGuard g(rewriter); // The only difference between InsertSliceOp and ParallelInsertSliceOp is - // that the insertion point is just before the ParallelCombiningOp in the + // that the insertion point is just before the InParallelOp in the // parallel case. - if (std::is_same::value) + if (isa(insertSliceOp->getParentOp())) rewriter.setInsertionPoint(insertSliceOp->getParentOp()); Value cast = tensor::CastOp::create(rewriter, insertSliceOp.getLoc(), newSrcType, insertSliceOp.getSource()); @@ -3846,8 +3846,7 @@ OpFoldResult PadOp::fold(FoldAdaptor) { //===----------------------------------------------------------------------===// OpResult ParallelInsertSliceOp::getTiedOpResult() { - ParallelCombiningOpInterface parallelCombiningParent = - getParallelCombiningParent(); + InParallelOpInterface parallelCombiningParent = getParallelCombiningParent(); for (const auto &it : llvm::enumerate(parallelCombiningParent.getYieldingOps())) { Operation &nextOp = it.value(); @@ -3901,8 +3900,8 @@ void ParallelInsertSliceOp::build(OpBuilder &b, OperationState &result, } LogicalResult ParallelInsertSliceOp::verify() { - if (!isa(getOperation()->getParentOp())) - return this->emitError("expected ParallelCombiningOpInterface parent, got:") + if (!isa(getOperation()->getParentOp())) + return this->emitError("expected InParallelOpInterface parent, got:") << *(getOperation()->getParentOp()); // Verify result type against inferred type. @@ -3935,6 +3934,19 @@ llvm::SmallBitVector ParallelInsertSliceOp::getDroppedDims() { return ::getDroppedDims(getSourceType().getShape(), getMixedSizes()); } +// ParallelCombiningOpInterface implementation. +MutableOperandRange ParallelInsertSliceOp::getUpdatedDestinations() { + return getDestMutable(); +} + +Operation *ParallelInsertSliceOp::getIteratingParent() { + // Return the parent InParallelOpInterface's parent. + if (auto combiningOp = + dyn_cast(getOperation()->getParentOp())) + return combiningOp->getParentOp(); + return nullptr; +} + //===----------------------------------------------------------------------===// // ScatterOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp index c3356c1e4b9d8..bce964e47a3be 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -970,10 +970,10 @@ struct ParallelInsertSliceOpInterface BufferizationState &state) const { OpBuilder::InsertionGuard g(rewriter); auto parallelInsertSliceOp = cast(op); - ParallelCombiningOpInterface parallelCombiningParent = + InParallelOpInterface parallelCombiningParent = parallelInsertSliceOp.getParallelCombiningParent(); - // Bufferize the op outside of the parallel combining terminator. + // Bufferize the op outside of the in parallel terminator. rewriter.setInsertionPoint(parallelCombiningParent); // Get source and destination buffers. diff --git a/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp index d76c02af7ab16..b32faf481af80 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp @@ -215,12 +215,11 @@ struct InsertSliceOfInsertSliceFolder : public OpRewritePattern { sourceInsertSliceOp.getMixedSizes(), droppedDims, resolvedSizes); - // If we are inside an InParallel region, temporarily set the insertion - // point outside: only tensor.parallel_insert_slice ops are allowed in - // there. - if (std::is_same_v) { - rewriter.setInsertionPoint( - insertSliceOp->template getParentOfType()); + // If we are inside a ParallelCombining region, temporarily set the + // insertion point outside: only ops of ParallelCombiningOpInterface are + // allowed in there. + if (isa(insertSliceOp.getOperation())) { + rewriter.setInsertionPoint(insertSliceOp->getParentOp()); } // Resolve offsets according to source offsets and strides. diff --git a/mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp b/mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp index 2b6703543bbd3..30b8191bf34b0 100644 --- a/mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp +++ b/mlir/lib/Interfaces/ParallelCombiningOpInterface.cpp @@ -11,11 +11,11 @@ using namespace mlir; //===----------------------------------------------------------------------===// -// ParallelCombiningOpInterface +// InParallelOpInterface (formerly ParallelCombiningOpInterface) //===----------------------------------------------------------------------===// // TODO: Single region single block interface on interfaces ? -LogicalResult mlir::detail::verifyParallelCombiningOpInterface(Operation *op) { +LogicalResult mlir::detail::verifyInParallelOpInterface(Operation *op) { if (op->getNumRegions() != 1) return op->emitError("expected single region op"); if (!op->getRegion(0).hasOneBlock()) diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir index 5f42938244db6..9005110205630 100644 --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -915,7 +915,7 @@ func.func @sparse_case(%arg0: tensor<8x8xf32, #CSR>, %arg1: tensor<8xf32>) -> te // ----- -func.func @reduce_dispatch_0() -> tensor<4x2xf32> { +func.func @parallel_insert_slice() -> tensor<4x2xf32> { %c2 = arith.constant 2 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.000000e+00 : f32 @@ -923,6 +923,7 @@ func.func @reduce_dispatch_0() -> tensor<4x2xf32> { %res = scf.forall (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) { %1 = tensor.empty() : tensor<1x1xf32> %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> + // CHECK: scf.forall.in_parallel scf.forall.in_parallel { // CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}} // CHECK-SAME: [%{{.*}}, %{{.*}}] [1, 1] [1, 1] : tensor into tensor<4x2xf32> diff --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir index bb7958083e55c..37fc86b18e7f0 100644 --- a/mlir/test/Dialect/SCF/invalid.mlir +++ b/mlir/test/Dialect/SCF/invalid.mlir @@ -645,7 +645,7 @@ func.func @wrong_terminator_op(%in: tensor<100xf32>, %out: tensor<100xf32>) { %result = scf.forall (%thread_idx) in (%num_threads) shared_outs(%o = %out) -> (tensor<100xf32>) { %1 = tensor.extract_slice %in[%thread_idx][1][1] : tensor<100xf32> to tensor<1xf32> - // expected-error @+1 {{expected only tensor.parallel_insert_slice ops}} + // expected-error @+1 {{expected only ParallelCombiningOpInterface ops}} scf.forall.in_parallel { tensor.parallel_insert_slice %1 into %o[%thread_idx][1][1] : tensor<1xf32> into tensor<100xf32> diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir index 9bb87ffbb2090..ed3685514dd0d 100644 --- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir @@ -908,3 +908,111 @@ func.func @parallel_region_no_read() } return } + +// ----- + +// CHECK-LABEL: func @in_order_multiple_parallel_writes +func.func @in_order_multiple_parallel_writes(%2: tensor<320xf32> {bufferization.writable = true}, + %3: tensor<320xf32> {bufferization.writable = true}) + -> (tensor<320xf32>, tensor<320xf32>) +{ + %c0 = arith.constant 0 : index + %cst = arith.constant -0.000000e+00 : f32 + %c320 = arith.constant 320 : index + %4:2 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2, %arg2 = %3) -> (tensor<320xf32>, tensor<320xf32>) { + // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]} + %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> + // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]} + %7 = tensor.extract_slice %arg2[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> + // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} + %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) -> tensor<1xf32> + + // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} + // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} + scf.forall.in_parallel { + tensor.parallel_insert_slice %6 into %arg2[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32> + tensor.parallel_insert_slice %8 into %arg1[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32> + } + } + return %4#0, %4#1 : tensor<320xf32>, tensor<320xf32> +} + +// ----- + +// CHECK-LABEL: func @out_of_order_parallel_write +func.func @out_of_order_parallel_write(%2: tensor<320xf32> {bufferization.writable = true}, + %3: tensor<320xf32> {bufferization.writable = true}) + -> (tensor<320xf32>, tensor<320xf32>) +{ + %c0 = arith.constant 0 : index + %cst = arith.constant -0.000000e+00 : f32 + %c320 = arith.constant 320 : index + %4:2 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2, %arg2 = %3) -> (tensor<320xf32>, tensor<320xf32>) { + // The extract_slice cannot operate in place because it is used after the + // first write. + // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]} + %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> + + // Additionally the fill aliases the thread local slice. + // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]} + %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) -> tensor<1xf32> + + scf.forall.in_parallel { + // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} + tensor.parallel_insert_slice %7 into %arg1[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32> + // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} + tensor.parallel_insert_slice %6 into %arg2[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32> + } + } + return %4#0, %4#1 : tensor<320xf32>, tensor<320xf32> +} + +// ----- + +// CHECK-LABEL: func @out_of_order_parallel_write +func.func @out_of_order_parallel_write_multiple_reads(%2: tensor<320xf32> {bufferization.writable = true}, + %3: tensor<320xf32> {bufferization.writable = true}) + -> (tensor<320xf32>, tensor<320xf32>) +{ + %c0 = arith.constant 0 : index + %cst = arith.constant -0.000000e+00 : f32 + %c320 = arith.constant 320 : index + %4:2 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2, %arg2 = %3) -> (tensor<320xf32>, tensor<320xf32>) { + // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["false", "none"]} + %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> + // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} + %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) -> tensor<1xf32> + + %reverse = arith.subi %c320, %arg0 : index + // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]} + %8 = tensor.extract_slice %arg1[%reverse] [1] [1] : tensor<320xf32> to tensor<1xf32> + scf.forall.in_parallel { + // Also cannot operate in place due to subsequent conflicting reads. + // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} + tensor.parallel_insert_slice %7 into %arg1[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32> + // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} + tensor.parallel_insert_slice %8 into %arg2[%reverse] [1] [1] : tensor<1xf32> into tensor<320xf32> + } + } + return %4#0, %4#1 : tensor<320xf32>, tensor<320xf32> +} +// ----- + +// CHECK-LABEL: func @in_order_multiple_parallel_writes +func.func @in_order_multiple_parallel_writes(%2: tensor<320xf32> {bufferization.writable = true}) + -> (tensor<320xf32>) +{ + %c0 = arith.constant 0 : index + %cst = arith.constant -0.000000e+00 : f32 + %c320 = arith.constant 320 : index + %4 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2) -> (tensor<320xf32>) { + // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]} + %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> + %reverse = arith.subi %c320, %arg0 : index + // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} + scf.forall.in_parallel { + tensor.parallel_insert_slice %6 into %arg1[%reverse] [1] [1] : tensor<1xf32> into tensor<320xf32> + } + } + return %4 : tensor<320xf32> +} diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir index 8f4b924cfd3cc..92486b8ed7208 100644 --- a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir @@ -112,7 +112,7 @@ func.func @scf_while_non_equiv_condition_and_body(%A: tensor<5xi1>, // CHECK-SAME: %[[arg0:.*]]: tensor<100xf32>, %[[arg1:.*]]: tensor<100xf32> // CHECK-FUNC-LABEL: func @scf_forall_out_of_place( func.func @scf_forall_out_of_place(%in: tensor<100xf32>, - %out: tensor<100xf32>) { + %out: tensor<100xf32>) { %c1 = arith.constant 1 : index %num_threads = arith.constant 100 : index @@ -132,3 +132,31 @@ func.func @scf_forall_out_of_place(%in: tensor<100xf32>, } {mapping = [#gpu.thread]} return } + +// ----- + +// CHECK-LABEL: func @in_order_multiple_parallel_writes +func.func @in_order_multiple_parallel_writes(%2: tensor<320xf32>, + %3: tensor<320xf32>) + -> (tensor<320xf32>, tensor<320xf32>) +{ + %c0 = arith.constant 0 : index + %cst = arith.constant -0.000000e+00 : f32 + %c320 = arith.constant 320 : index + %4:2 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %2, %arg2 = %3) -> (tensor<320xf32>, tensor<320xf32>) { + // CHECK: tensor.extract_slice {{.*}} + %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> + // CHECK: tensor.extract_slice {{.*}} + %7 = tensor.extract_slice %arg2[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> + // CHECK: linalg.fill {{.*}} + %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) -> tensor<1xf32> + + // CHECK: tensor.parallel_insert_slice {{.*}} + // CHECK: tensor.parallel_insert_slice {{.*}} + scf.forall.in_parallel { + tensor.parallel_insert_slice %6 into %arg2[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32> + tensor.parallel_insert_slice %8 into %arg1[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32> + } + } + return %4#0, %4#1 : tensor<320xf32>, tensor<320xf32> +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 5042198d78b74..66cb7956c89f2 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -10819,6 +10819,7 @@ cc_library( ":LinalgTransformOpsIncGen", ":LinalgTransforms", ":LinalgUtils", + ":ParallelCombiningOpInterface", ":SCFDialect", ":SCFTransforms", ":Support", From 7f2e9b17098f42c85ef469b029bb84ef4eea189c Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 12 Sep 2025 21:24:15 +0000 Subject: [PATCH 182/734] Revert "[libc++] Mark __{emplace,push}_back_slow_path as noinline (#94379)" This reverts commit 1bafd020c7c80be476f211bc239ce43424f7e0ce. This breaks the LLDB data formatters which means these failures show up on every premerge run. Reverting for now until fixing the LLDB formatters can be coordinated with a relanding. --- libcxx/include/__vector/vector.h | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h index 27e681aeef22a..5e6572b1a82c4 100644 --- a/libcxx/include/__vector/vector.h +++ b/libcxx/include/__vector/vector.h @@ -1161,24 +1161,6 @@ vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args) { return this->__end_; } -// This makes the compiler inline `__else()` if `__cond` is known to be false. Currently LLVM doesn't do that without -// the `__builtin_constant_p`, since it considers `__else` unlikely even through it's known to be run. -// See https://llvm.org/PR154292 -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __if_likely_else(bool __cond, _If __if, _Else __else) { - if (__builtin_constant_p(__cond)) { - if (__cond) - __if(); - else - __else(); - } else { - if (__cond) [[__likely__]] - __if(); - else - __else(); - } -} - template template _LIBCPP_CONSTEXPR_SINCE_CXX20 inline @@ -1189,14 +1171,12 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 inline #endif vector<_Tp, _Allocator>::emplace_back(_Args&&... __args) { pointer __end = this->__end_; - std::__if_likely_else( - __end < this->__cap_, - [&] { - __emplace_back_assume_capacity(std::forward<_Args>(__args)...); - ++__end; - }, - [&] { __end = __emplace_back_slow_path(std::forward<_Args>(__args)...); }); - + if (__end < this->__cap_) { + __emplace_back_assume_capacity(std::forward<_Args>(__args)...); + ++__end; + } else { + __end = __emplace_back_slow_path(std::forward<_Args>(__args)...); + } this->__end_ = __end; #if _LIBCPP_STD_VER >= 17 return *(__end - 1); From 32620c58ac72727083d8ef310572970a8b11511d Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 12 Sep 2025 21:27:13 +0000 Subject: [PATCH 183/734] [lit] Add missing split-file dependency There was a recent patch that added in some tests to the lit test suite that use split-file. An explicit dependency in CMake was not added, which led to check-lit not working if being run without doing a full build first. This patch explicitly adds the dependency inside the CMake file to fix this configuration. --- llvm/utils/lit/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/lit/CMakeLists.txt b/llvm/utils/lit/CMakeLists.txt index d22a778e2e531..97b1d7c022fd5 100644 --- a/llvm/utils/lit/CMakeLists.txt +++ b/llvm/utils/lit/CMakeLists.txt @@ -22,7 +22,7 @@ add_custom_target(prepare-check-lit # Add rules for lit's own test suite add_lit_testsuite(check-lit "Running lit's tests" ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS "FileCheck" "not" "prepare-check-lit" + DEPENDS "FileCheck" "not" "split-file" "prepare-check-lit" ) # For IDEs From 8e17f80908abd5a22acf962584371b71dffe6d15 Mon Sep 17 00:00:00 2001 From: Nishant Patel Date: Fri, 12 Sep 2025 14:33:52 -0700 Subject: [PATCH 184/734] [MLIR][XeGPU] Distribute vector.step & vector.shape_cast op from wg to sg (#155443) This PR adds patterns to distribute vector.step and vector.shape_cast op from wg to sg and it also enables constant, broadcast and elementwise ops to handle the slice attribute --- .../Transforms/XeGPUWgToSgDistribute.cpp | 180 +++++++++++++++--- .../XeGPU/xegpu-wg-to-sg-unify-ops.mlir | 59 ++++++ 2 files changed, 208 insertions(+), 31 deletions(-) diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 3f48400fedf5e..d7592fed6d186 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -468,6 +468,7 @@ struct WgToSgVectorBroadcastOp LogicalResult matchAndRewrite(vector::BroadcastOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { + VectorType resultType = op.getResult().getType(); ArrayRef wgShape = resultType.getShape(); @@ -476,43 +477,24 @@ struct WgToSgVectorBroadcastOp if (!layout || !layout.isForWorkgroup()) return failure(); - // TODO: Currently only supports cases where the source and result ranks - // are the same. - auto srcType = - dyn_cast(adaptor.getOperands().front()[0].getType()); - if (!srcType || srcType.getRank() != resultType.getRank()) - return failure(); - SmallVector sgShape = getSgShapeAndCount(wgShape, layout).first; VectorType newResultType = VectorType::get(sgShape, resultType.getElementType()); - // Check if the output layout is distributable - SmallVector sgLayout = layout.getEffectiveSgLayoutAsInt(); - if (sgLayout.empty()) - return failure(); - if (!xegpu::XeGPUDialect::isEvenlyDistributable(wgShape, layout)) return failure(); - // Check if the srcShape has unit dim in dimensions being broadcasted, - // and the other dimensions are the same as the destination type - // TODO: Generalize it - auto srcShape = srcType.getShape(); - for (size_t i = 0; i < srcShape.size(); ++i) { - if (srcShape[i] != 1 && srcShape[i] != sgShape[i]) - return failure(); - } - SmallVector newBroadcastOps; for (auto operand : adaptor.getOperands().front()) { auto newBroadcast = vector::BroadcastOp::create(rewriter, op.getLoc(), newResultType, operand); - xegpu::setDistributeLayoutAttr(newBroadcast->getResult(0), - layout.dropSgLayoutAndData()); + if (!layout.getEffectiveLaneLayoutAsInt().empty() || + !layout.getEffectiveInstDataAsInt().empty()) + xegpu::setDistributeLayoutAttr(newBroadcast->getResult(0), + layout.dropSgLayoutAndData()); + newBroadcastOps.push_back(newBroadcast.getResult()); } - rewriter.replaceOpWithMultiple(op, {newBroadcastOps}); return success(); } @@ -564,9 +546,11 @@ struct WgToSgElementwiseOp : public ConversionPattern { // Copy all attributes, but update "layout_result_0" to drop // sgLayout/sgData for (auto attr : op->getAttrs()) { - if (auto layout = dyn_cast(attr.getValue())) { - if (auto newLayout = layout.dropSgLayoutAndData()) - state.addAttribute(attr.getName(), newLayout); + if (auto layout = + dyn_cast(attr.getValue())) { + if (!layout.getEffectiveLaneLayoutAsInt().empty() || + !layout.getEffectiveInstDataAsInt().empty()) + state.addAttribute(attr.getName(), layout.dropSgLayoutAndData()); } else { state.addAttribute(attr.getName(), attr.getValue()); } @@ -757,8 +741,10 @@ struct WgToSgArithConstantOp : public OpConversionPattern { auto sgAttr = DenseElementsAttr::get(newType, singleVal); auto cstOp = arith::ConstantOp::create(rewriter, op.getLoc(), newType, sgAttr); - if (auto newLayout = layout.dropSgLayoutAndData()) - xegpu::setDistributeLayoutAttr(cstOp->getResult(0), newLayout); + if (!layout.getEffectiveLaneLayoutAsInt().empty() || + !layout.getEffectiveInstDataAsInt().empty()) + xegpu::setDistributeLayoutAttr(cstOp->getResult(0), + layout.dropSgLayoutAndData()); SmallVector newConsts(count, cstOp); rewriter.replaceOpWithMultiple(op, {newConsts}); @@ -919,6 +905,128 @@ struct WgToSgStoreMatrixOp : public OpConversionPattern { } }; +// This pattern distributes the vector.step ops to work at subgroup level +struct WgToSgVectorStepOp : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(vector::StepOp op, OneToNOpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(op.getResult()); + if (!layout || !layout.isForWorkgroup()) + return failure(); + + Location loc = op.getLoc(); + VectorType type = op.getResult().getType(); + auto wgShape = type.getShape(); + std::optional> sgShape = + getSgShapeAndCount(wgShape, layout).first; + if (!sgShape) + return failure(); + + Value sgId = + gpu::SubgroupIdOp::create(rewriter, loc, /*upper_bound=*/nullptr); + auto sgOffsets = layout.getOffsets(rewriter, loc, sgId, wgShape); + if (failed(sgOffsets)) + return failure(); + + VectorType newTy = type.cloneWith(*sgShape, type.getElementType()); + auto steps = vector::StepOp::create(rewriter, loc, newTy); + SmallVector newOps; + for (auto offsets : *sgOffsets) { + // Broadcast the offset scalar to a vector & add to the base steps + auto bcastOffset = + vector::BroadcastOp::create(rewriter, loc, newTy, offsets[0]); + auto finalSteps = + arith::AddIOp::create(rewriter, loc, steps, bcastOffset); + if (!layout.getEffectiveLaneLayoutAsInt().empty() || + !layout.getEffectiveInstDataAsInt().empty()) { + xegpu::setDistributeLayoutAttr(steps->getResult(0), + layout.dropSgLayoutAndData()); + xegpu::setDistributeLayoutAttr(bcastOffset->getResult(0), + layout.dropSgLayoutAndData()); + xegpu::setDistributeLayoutAttr(finalSteps->getResult(0), + layout.dropSgLayoutAndData()); + } + newOps.push_back(finalSteps); + } + + rewriter.replaceOpWithMultiple(op, {newOps}); + return success(); + } +}; + +// This pattern transforms vector.shape_cast ops to work at subgroup level. +struct WgToSgVectorShapeCastOp + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(vector::ShapeCastOp op, OneToNOpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + + VectorType resultType = dyn_cast(op.getResult().getType()); + if (!resultType) + return failure(); + + ArrayRef wgShape = resultType.getShape(); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(op.getResult()); + if (!layout || !layout.isForWorkgroup()) + return failure(); + + SmallVector sgShape = getSgShapeAndCount(wgShape, layout).first; + VectorType newResultType = + VectorType::get(sgShape, resultType.getElementType()); + + // TODO: Add check for compatible layouts in layout attr. + auto srcType = dyn_cast(adaptor.getSource()[0].getType()); + if (!srcType) + return failure(); + + // Check that shape_cast only adds/removes unit dimensions, + auto onlyUnitDims = [](ArrayRef src, ArrayRef dst) { + // Remove all 1s from both shapes and compare the rest. + SmallVector srcNonUnit, dstNonUnit; + for (int64_t d : src) + if (d != 1) + srcNonUnit.push_back(d); + for (int64_t d : dst) + if (d != 1) + dstNonUnit.push_back(d); + return srcNonUnit == dstNonUnit; + }; + + if (!onlyUnitDims(srcType.getShape(), sgShape)) + return failure(); + + // For rank reducing or increasing shape_cast ops, the lower rank layout + // must be a slice of higher rank layout. + int64_t sourceRank = srcType.getRank(); + int64_t resultRank = sgShape.size(); + xegpu::DistributeLayoutAttr sourceLayout = + xegpu::getDistributeLayoutAttr(op.getSource()); + if (sourceRank < resultRank && !sourceLayout.isSliceOf(layout)) + return failure(); + if (sourceRank > resultRank && !layout.isSliceOf(sourceLayout)) + return failure(); + + SmallVector newShapeCastOps; + for (auto src : adaptor.getSource()) { + auto newShapeCast = + rewriter.create(op.getLoc(), newResultType, src); + if (!layout.getEffectiveLaneLayoutAsInt().empty() || + !layout.getEffectiveInstDataAsInt().empty()) + xegpu::setDistributeLayoutAttr(newShapeCast->getResult(0), + layout.dropSgLayoutAndData()); + newShapeCastOps.push_back(newShapeCast.getResult()); + } + + rewriter.replaceOpWithMultiple(op, {newShapeCastOps}); + return success(); + } +}; + } // namespace namespace mlir { @@ -932,7 +1040,8 @@ void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns) { WgToSgElementwiseOp, WgToSgVectorBroadcastOp, WgToSgConvertLayoutOp, WgToSgArithConstantOp, WgToSgLoadGatherOpWithOffset, WgToSgStoreScatterOpWithOffset, WgToSgLoadMatrixOp, - WgToSgStoreMatrixOp>(patterns.getContext()); + WgToSgStoreMatrixOp, WgToSgVectorStepOp, WgToSgVectorShapeCastOp>( + patterns.getContext()); } } // namespace xegpu } // namespace mlir @@ -1054,7 +1163,16 @@ void XeGPUWgToSgDistributePass::runOnOperation() { auto vecType = dyn_cast(op.getType()); if (!vecType) return true; - return isLegal(xegpu::getDistributeLayoutAttr(op.getResult())); + + auto layout = xegpu::getDistributeLayoutAttr(op.getResult()); + return isLegal(layout); + }); + + target.addDynamicallyLegalOp( + [=](Operation *op) -> bool { + // Check for either a SliceAttr or LayoutAttr on the result. + auto layout = xegpu::getDistributeLayoutAttr(op->getResult(0)); + return isLegal(layout); }); target.addDynamicallyLegalOp( diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir index afb2bf876c18f..3478a9b91da5f 100644 --- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir +++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir @@ -2,6 +2,7 @@ //CHECK: #map = affine_map<()[s0] -> (s0 floordiv 4)> //CHECK: #map1 = affine_map<()[s0] -> (s0 mod 4)> +//CHECK: #map2 = affine_map<()[s0] -> (s0 floordiv 8)> gpu.module @test_distribution { // CHECK-LABEL: create_nd_tdesc_no_offset // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32> @@ -365,4 +366,62 @@ gpu.module @test_distribution { xegpu.store_matrix %cst, %mdesc[0, 0] {layout = #xegpu.layout} : vector<64x128xf32>, !xegpu.mem_desc<64x128xf32> gpu.return } + + // CHECK-LABEL: vector_step_op + gpu.func @vector_step_op_slice_attr() { + //CHECK: [[sgId:%.+]] = gpu.subgroup_id : index + //CHECK-DAG: [[IDY:%.+]] = affine.apply #map2()[[[sgId]]] + //CHECK-DAG: [[c32:%.+]] = arith.constant 32 : index + //CHECK-DAG: [[LOCALY:%.+]] = index.mul [[IDY]], [[c32]] + //CHECK-DAG: [[c0:%.+]] = arith.constant 0 : index + //CHECK-DAG: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index + //CHECK-DAG: [[c128:%.+]] = arith.constant 128 : index + //CHECK-DAG: [[MODY:%.+]] = index.remu [[Y]], [[c128]] + //CHECK-DAG: [[BASE:%.+]] = vector.step : vector<32xindex> + //CHECK-DAG: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<32xindex> + //CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<32xindex> + %step = vector.step {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1]>}: vector<128xindex> + gpu.return + } + + gpu.func @vector_step_op_layout_attr() { + //CHECK: [[sgId:%.+]] = gpu.subgroup_id : index + //CHECK-DAG: [[c16:%.+]] = arith.constant 16 : index + //CHECK-DAG: [[c8:%.+]] = arith.constant 8 : index + //CHECK-DAG: [[LOCALY:%.+]] = index.mul [[sgId]], [[c8]] + //CHECK-DAG: [[c0:%.+]] = arith.constant 0 : index + //CHECK-DAG: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index + //CHECK-DAG: [[c128:%.+]] = arith.constant 128 : index + //CHECK-DAG: [[MODY:%.+]] = index.remu [[Y]], [[c128]] + //CHECK-DAG: [[BASE:%.+]] = vector.step : vector<8xindex> + //CHECK-DAG: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<8xindex> + //CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<8xindex> + %step = vector.step {layout_result_0 = #xegpu.layout}: vector<128xindex> + gpu.return + } + + // CHECK-LABEL: constant_with_slice_attr + gpu.func @constant_with_slice_attr() { + //CHECK: [[cst:%.+]] = arith.constant dense<10> : vector<1xindex> + %cst = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1, 2, 3]>} dense<10> : vector<4xindex> + gpu.return + } + + // CHECK-LABEL: vector_shape_cast + gpu.func @vector_shape_cast() { + %cst = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0, 1, 2]>} dense<10> : vector<128xindex> + %step = vector.step {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0, 1, 2]>} : vector<128xindex> + %muli = arith.muli %cst, %step {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0, 1, 2]>} : vector<128xindex> + //CHECK: vector.shape_cast {{.*}} : vector<32xindex> to vector<1x1x1x32xindex> + %shape_cast = vector.shape_cast %muli {layout_result_0 = #xegpu.layout} : vector<128xindex> to vector<1x1x1x128xindex> + gpu.return + } + + // CHECK-LABEL: vector_broadcast + gpu.func @vector_broadcast(%arg0: index, %arg1: index) { + %muli = arith.muli %arg0, %arg1 : index + // CHECK: vector.broadcast {{.*}} : index to vector<1x1x1x32xindex> + %broadcast = vector.broadcast %muli {layout_result_0 = #xegpu.layout} : index to vector<4x2x6x32xindex> + gpu.return + } } From f019e2368b137371d248a7ddbe37f76466c2d44d Mon Sep 17 00:00:00 2001 From: lntue Date: Fri, 12 Sep 2025 17:57:08 -0400 Subject: [PATCH 185/734] [libc] Change __builtin_memcpy to inline_memcpy. (#158345) --- libc/src/__support/CMakeLists.txt | 1 + libc/src/__support/arg_list.h | 3 ++- libc/src/stdio/printf_core/CMakeLists.txt | 1 + .../stdio/printf_core/float_dec_converter_limited.h | 5 +++-- libc/src/stdlib/CMakeLists.txt | 1 + libc/src/stdlib/qsort_data.h | 11 ++++++----- libc/src/string/CMakeLists.txt | 1 + libc/src/string/stpcpy.cpp | 3 ++- libc/src/string/string_utils.h | 3 ++- libc/src/wchar/CMakeLists.txt | 1 + libc/src/wchar/wcpcpy.cpp | 2 +- libc/src/wchar/wcscpy.cpp | 2 +- libc/src/wchar/wmemcpy.cpp | 3 ++- libc/src/wchar/wmempcpy.cpp | 3 ++- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 4 ++++ 15 files changed, 30 insertions(+), 14 deletions(-) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index b6e87ac336fb2..0ef09a9b8c9d0 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -302,6 +302,7 @@ add_header_library( DEPENDS libc.hdr.stdint_proxy libc.src.__support.common + libc.src.string.memory_utils.inline_memcpy ) add_header_library( diff --git a/libc/src/__support/arg_list.h b/libc/src/__support/arg_list.h index 1e26a5e8ef9c7..7b78a9c0fe619 100644 --- a/libc/src/__support/arg_list.h +++ b/libc/src/__support/arg_list.h @@ -12,6 +12,7 @@ #include "hdr/stdint_proxy.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/string/memory_utils/inline_memcpy.h" #include #include @@ -126,7 +127,7 @@ template class StructArgList { // Memcpy because pointer alignment may be illegal given a packed struct. T val; - __builtin_memcpy(&val, ptr, sizeof(T)); + inline_memcpy(&val, ptr, sizeof(T)); ptr = reinterpret_cast(reinterpret_cast(ptr) + sizeof(T)); diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index 76eb0a2fdaaa5..ee66145e60156 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -112,6 +112,7 @@ add_header_library( libc.src.__support.libc_assert libc.src.__support.uint128 libc.src.__support.StringUtil.error_to_string + libc.src.string.memory_utils.inline_memcpy ) add_header_library( diff --git a/libc/src/stdio/printf_core/float_dec_converter_limited.h b/libc/src/stdio/printf_core/float_dec_converter_limited.h index f468dbc8e2ae8..9cdc13573d320 100644 --- a/libc/src/stdio/printf_core/float_dec_converter_limited.h +++ b/libc/src/stdio/printf_core/float_dec_converter_limited.h @@ -53,6 +53,7 @@ #include "src/stdio/printf_core/core_structs.h" #include "src/stdio/printf_core/float_inf_nan_converter.h" #include "src/stdio/printf_core/writer.h" +#include "src/string/memory_utils/inline_memcpy.h" namespace LIBC_NAMESPACE_DECL { namespace printf_core { @@ -250,7 +251,7 @@ DigitsOutput decimal_digits(DigitsInput input, int precision, bool e_mode) { // there's space for it in the DigitsOutput buffer). DigitsOutput output; output.ndigits = view.size(); - __builtin_memcpy(output.digits, view.data(), output.ndigits); + inline_memcpy(output.digits, view.data(), output.ndigits); // Set up the output exponent, which is done differently depending on mode. // Also, figure out whether we have one digit too many, and if so, set the @@ -551,7 +552,7 @@ convert_float_inner(Writer *writer, const FormatSection &to_conv, cpp::string_view expview = expcvt.view(); expbuf[0] = internal::islower(to_conv.conv_name) ? 'e' : 'E'; explen = expview.size() + 1; - __builtin_memcpy(expbuf + 1, expview.data(), expview.size()); + inline_memcpy(expbuf + 1, expview.data(), expview.size()); } // Now we know enough to work out the length of the unpadded output: diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index aa653c38a8c3f..c464f82dcbda7 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -292,6 +292,7 @@ add_header_library( libc.hdr.stdint_proxy libc.include.stdlib libc.src.__support.CPP.cstddef + libc.src.string.memory_utils.inline_memcpy ) add_entrypoint_object( diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h index 739fce88ab75d..4f9774088fbd3 100644 --- a/libc/src/stdlib/qsort_data.h +++ b/libc/src/stdlib/qsort_data.h @@ -12,6 +12,7 @@ #include "hdr/stdint_proxy.h" #include "src/__support/CPP/cstddef.h" #include "src/__support/macros/config.h" +#include "src/string/memory_utils/inline_memcpy.h" namespace LIBC_NAMESPACE_DECL { namespace internal { @@ -54,9 +55,9 @@ class ArrayGenericSize { const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem); while (elem_i != elem_i_block_end) { - __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE); - __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE); - __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE); + inline_memcpy(tmp_block, elem_i, BLOCK_SIZE); + inline_memcpy(elem_i, elem_j, BLOCK_SIZE); + inline_memcpy(elem_j, tmp_block, BLOCK_SIZE); elem_i += BLOCK_SIZE; elem_j += BLOCK_SIZE; @@ -112,9 +113,9 @@ template class ArrayFixedSize { cpp::byte *elem_i = get_internal(i); cpp::byte *elem_j = get_internal(j); - __builtin_memcpy(tmp, elem_i, ELEM_SIZE); + inline_memcpy(tmp, elem_i, ELEM_SIZE); __builtin_memmove(elem_i, elem_j, ELEM_SIZE); - __builtin_memcpy(elem_j, tmp, ELEM_SIZE); + inline_memcpy(elem_j, tmp, ELEM_SIZE); } LIBC_INLINE size_t len() const { return array_len; } diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 5c9f622d44397..b8cdb2a7d3538 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -22,6 +22,7 @@ add_header_library( libc.src.__support.CPP.type_traits libc.src.__support.CPP.simd libc.src.__support.common + libc.src.string.memory_utils.inline_memcpy ${string_config_options} ) diff --git a/libc/src/string/stpcpy.cpp b/libc/src/string/stpcpy.cpp index 48c0db950ace0..fefae81172585 100644 --- a/libc/src/string/stpcpy.cpp +++ b/libc/src/string/stpcpy.cpp @@ -8,6 +8,7 @@ #include "src/string/stpcpy.h" #include "src/__support/macros/config.h" +#include "src/string/memory_utils/inline_memcpy.h" #include "src/string/string_utils.h" #include "src/__support/common.h" @@ -17,7 +18,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(char *, stpcpy, (char *__restrict dest, const char *__restrict src)) { size_t size = internal::string_length(src) + 1; - __builtin_memcpy(dest, src, size); + inline_memcpy(dest, src, size); char *result = dest + size; if (result != nullptr) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 10803488b6cf5..9d636d02f4756 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -21,6 +21,7 @@ #include "src/__support/CPP/type_traits.h" // cpp::is_same_v #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/string/memory_utils/inline_memcpy.h" #if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) #if LIBC_HAS_VECTOR_TYPE @@ -242,7 +243,7 @@ LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src, if (!size) return len; size_t n = len < size - 1 ? len : size - 1; - __builtin_memcpy(dst, src, n); + inline_memcpy(dst, src, n); dst[n] = '\0'; return len; } diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 9ba0a06c57b7f..adde382bf0950 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -452,6 +452,7 @@ add_entrypoint_object( DEPENDS libc.hdr.types.size_t libc.hdr.wchar_macros + libc.src.string.memory_utils.inline_memcpy ) add_entrypoint_object( diff --git a/libc/src/wchar/wcpcpy.cpp b/libc/src/wchar/wcpcpy.cpp index 9e2b12f09eb05..b6d80d4d671d9 100644 --- a/libc/src/wchar/wcpcpy.cpp +++ b/libc/src/wchar/wcpcpy.cpp @@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(wchar_t *, wcpcpy, (wchar_t *__restrict s1, const wchar_t *__restrict s2)) { size_t size = internal::string_length(s2); - __builtin_memcpy(s1, s2, (size + 1) * sizeof(wchar_t)); + inline_memcpy(s1, s2, (size + 1) * sizeof(wchar_t)); wchar_t *result = s1 + size; return result; } diff --git a/libc/src/wchar/wcscpy.cpp b/libc/src/wchar/wcscpy.cpp index 01ba994cecbb2..703706e6a7be8 100644 --- a/libc/src/wchar/wcscpy.cpp +++ b/libc/src/wchar/wcscpy.cpp @@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(wchar_t *, wcscpy, (wchar_t *__restrict s1, const wchar_t *__restrict s2)) { size_t size = internal::string_length(s2) + 1; - __builtin_memcpy(s1, s2, size * sizeof(wchar_t)); + inline_memcpy(s1, s2, size * sizeof(wchar_t)); return s1; } diff --git a/libc/src/wchar/wmemcpy.cpp b/libc/src/wchar/wmemcpy.cpp index bf92309b20944..56708d6cee496 100644 --- a/libc/src/wchar/wmemcpy.cpp +++ b/libc/src/wchar/wmemcpy.cpp @@ -12,13 +12,14 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/string/memory_utils/inline_memcpy.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(wchar_t *, wmemcpy, (wchar_t *__restrict s1, const wchar_t *__restrict s2, size_t n)) { - __builtin_memcpy(s1, s2, n * sizeof(wchar_t)); + inline_memcpy(s1, s2, n * sizeof(wchar_t)); return s1; } diff --git a/libc/src/wchar/wmempcpy.cpp b/libc/src/wchar/wmempcpy.cpp index 21e16210a757a..d8b89c0a88d05 100644 --- a/libc/src/wchar/wmempcpy.cpp +++ b/libc/src/wchar/wmempcpy.cpp @@ -11,13 +11,14 @@ #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/common.h" +#include "src/string/memory_utils/inline_memcpy.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(wchar_t *, wmempcpy, (wchar_t *__restrict to, const wchar_t *__restrict from, size_t size)) { - __builtin_memcpy(to, from, size * sizeof(wchar_t)); + inline_memcpy(to, from, size * sizeof(wchar_t)); return reinterpret_cast(to) + size; } diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index d9b1bb5635aaf..a955f7f4916ac 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -818,6 +818,7 @@ libc_support_library( hdrs = ["src/__support/arg_list.h"], deps = [ ":__support_common", + ":string_memory_utils", ], ) @@ -5003,6 +5004,7 @@ libc_support_library( ":__support_cpp_bit", ":__support_cpp_cstddef", ":__support_macros_attributes", + ":string_memory_utils", ], ) @@ -6945,6 +6947,7 @@ libc_function( deps = [ ":__support_common", ":__support_macros_config", + ":string_memory_utils", ":types_size_t", ":types_wchar_t", ], @@ -6968,6 +6971,7 @@ libc_function( hdrs = ["src/wchar/wmempcpy.h"], deps = [ ":__support_common", + ":string_memory_utils", ":types_size_t", ":types_wchar_t", ], From 9e33997242800d49964bfbd056288cbb0cf073ed Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 12 Sep 2025 15:04:38 -0700 Subject: [PATCH 186/734] [IR] Add `MD_prof` to the `Keep` list of `dropUBImplyingAttrsAndMetadata` (#154635) `MD_prof` is safe to keep when e.g. hoisting instructions. Issue #147390 --- llvm/lib/IR/Instruction.cpp | 7 ++++--- llvm/lib/Transforms/Scalar/LICM.cpp | 5 +---- .../Transforms/SimplifyCFG/PhiBlockMerge.ll | 21 ++++++++++++------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 5e87b5ff941ad..c1fafd759b5ab 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -553,16 +553,17 @@ void Instruction::dropUBImplyingAttrsAndUnknownMetadata( } void Instruction::dropUBImplyingAttrsAndMetadata(ArrayRef Keep) { - // !annotation metadata does not impact semantics. + // !annotation and !prof metadata does not impact semantics. // !range, !nonnull and !align produce poison, so they are safe to speculate. // !noundef and various AA metadata must be dropped, as it generally produces // immediate undefined behavior. static const unsigned KnownIDs[] = { LLVMContext::MD_annotation, LLVMContext::MD_range, - LLVMContext::MD_nonnull, LLVMContext::MD_align}; + LLVMContext::MD_nonnull, LLVMContext::MD_align, LLVMContext::MD_prof}; SmallVector KeepIDs; KeepIDs.reserve(Keep.size() + std::size(KnownIDs)); - append_range(KeepIDs, KnownIDs); + append_range(KeepIDs, (!ProfcheckDisableMetadataFixes ? KnownIDs + : drop_end(KnownIDs))); append_range(KeepIDs, Keep); dropUBImplyingAttrsAndUnknownMetadata(KeepIDs); } diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 40104e8fb4249..092a0fb264c28 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1705,10 +1705,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, // time in isGuaranteedToExecute if we don't actually have anything to // drop. It is a compile time optimization, not required for correctness. !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) { - if (ProfcheckDisableMetadataFixes) - I.dropUBImplyingAttrsAndMetadata(); - else - I.dropUBImplyingAttrsAndMetadata({LLVMContext::MD_prof}); + I.dropUBImplyingAttrsAndMetadata(); } if (isa(I)) diff --git a/llvm/test/Transforms/SimplifyCFG/PhiBlockMerge.ll b/llvm/test/Transforms/SimplifyCFG/PhiBlockMerge.ll index 2c5889a981db2..08397b5755a3f 100644 --- a/llvm/test/Transforms/SimplifyCFG/PhiBlockMerge.ll +++ b/llvm/test/Transforms/SimplifyCFG/PhiBlockMerge.ll @@ -1,20 +1,21 @@ -; NOTE: Assertions have been autogenerated by update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 ; Test merging of blocks that only have PHI nodes in them ; ; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s ; define i32 @test(i1 %a, i1 %b) { -; CHECK-LABEL: @test( -; CHECK: M: -; CHECK-NEXT: [[DOT:%.*]] = select i1 %b, i32 0, i32 1 -; CHECK-NEXT: [[W:%.*]] = select i1 %a, i32 2, i32 [[DOT]] +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: [[M:.*:]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[B]], i32 0, i32 1, !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: [[W:%.*]] = select i1 [[A]], i32 2, i32 [[SPEC_SELECT]], !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[R:%.*]] = add i32 [[W]], 1 ; CHECK-NEXT: ret i32 [[R]] ; - br i1 %a, label %M, label %O + br i1 %a, label %M, label %O, !prof !0 O: ; preds = %0 - br i1 %b, label %N, label %Q + br i1 %b, label %N, label %Q, !prof !1 Q: ; preds = %O br label %N N: ; preds = %Q, %O @@ -27,3 +28,9 @@ M: ; preds = %N, %0 ret i32 %R } +!0 = !{!"branch_weights", i32 11, i32 7} +!1 = !{!"branch_weights", i32 3, i32 5} +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 3, i32 5} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 11, i32 7} +;. From 8f25ea2d73d9a4a64e7ab26e6b1d7a8f73605713 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 12 Sep 2025 15:05:16 -0700 Subject: [PATCH 187/734] [NFC] Leave a comment in `Local.cpp` about debug info & sample profiling (#155296) Issue #152767 --- llvm/lib/Transforms/Utils/Local.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 2cfd70a1746c8..57dc1b38b8ec3 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3342,8 +3342,11 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, // retain their original debug locations (DILocations) and debug intrinsic // instructions. // - // Doing so would degrade the debugging experience and adversely affect the - // accuracy of profiling information. + // Doing so would degrade the debugging experience. + // + // FIXME: Issue #152767: debug info should also be the same as the + // original branch, **if** the user explicitly indicated that (for sampling + // PGO) // // Currently, when hoisting the instructions, we take the following actions: // - Remove their debug intrinsic instructions. From 0d4a615998a7d5a6ad1f2866e9f3276acfc70fc0 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 12 Sep 2025 15:07:25 -0700 Subject: [PATCH 188/734] [InstCombine] Make test resilient to metadata presence (#157607) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modernized it to using `update_test_checks`​ which addresses an ambgiuty in the previous test formulation, where a profile metadaat of value `i32 1`​ would have (incorrectly matched. --- .../InstCombine/2004-09-20-BadLoadCombine2.ll | 38 ++++++++++++------- llvm/utils/profcheck-xfail.txt | 1 - 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll index f558e35ebe015..1d89dd6195032 100644 --- a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll +++ b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll @@ -1,25 +1,35 @@ -; RUN: opt < %s -passes=instcombine,mem2reg,simplifycfg -simplifycfg-require-and-preserve-domtree=1 | \ -; RUN: llvm-dis | grep -v store | not grep "i32 1" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt %s -passes=instcombine,mem2reg,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -o - | FileCheck %s ; Test to make sure that instcombine does not accidentally propagate the load ; into the PHI, which would break the program. define i32 @test(i1 %C) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[X2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 1, ptr [[X]], align 4 +; CHECK-NEXT: store i32 2, ptr [[X2]], align 4 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C]], ptr [[X]], ptr [[X2]] +; CHECK-NEXT: store i32 3, ptr [[X]], align 4 +; CHECK-NEXT: [[TMP_3:%.*]] = load i32, ptr [[SPEC_SELECT]], align 4 +; CHECK-NEXT: ret i32 [[TMP_3]] +; entry: - %X = alloca i32 ; [#uses=3] - %X2 = alloca i32 ; [#uses=2] - store i32 1, ptr %X - store i32 2, ptr %X2 - br i1 %C, label %cond_true.i, label %cond_continue.i + %X = alloca i32 ; [#uses=3] + %X2 = alloca i32 ; [#uses=2] + store i32 1, ptr %X + store i32 2, ptr %X2 + br i1 %C, label %cond_true.i, label %cond_continue.i cond_true.i: ; preds = %entry - br label %cond_continue.i + br label %cond_continue.i cond_continue.i: ; preds = %cond_true.i, %entry - %mem_tmp.i.0 = phi ptr [ %X, %cond_true.i ], [ %X2, %entry ] ; [#uses=1] - store i32 3, ptr %X - %tmp.3 = load i32, ptr %mem_tmp.i.0 ; [#uses=1] - ret i32 %tmp.3 + %mem_tmp.i.0 = phi ptr [ %X, %cond_true.i ], [ %X2, %entry ] ; [#uses=1] + store i32 3, ptr %X + %tmp.3 = load i32, ptr %mem_tmp.i.0 ; [#uses=1] + ret i32 %tmp.3 } - - diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index e1ee7c3664a51..9d170b392b6c7 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -830,7 +830,6 @@ Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll Transforms/IndVarSimplify/pr45835.ll Transforms/IndVarSimplify/preserving-debugloc-rem-div.ll Transforms/Inline/optimization-remarks-hotness-threshold.ll -Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll Transforms/InstCombine/2004-09-20-BadLoadCombine.ll Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll Transforms/InstCombine/2011-02-14-InfLoop.ll From 8ee31ab00b95fc58110956f8945b0232045e8d86 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Fri, 12 Sep 2025 15:08:23 -0700 Subject: [PATCH 189/734] [WebKit checkers] Treat function pointers with "Singleton" suffix as singleton. (#158012) --- .../Checkers/WebKit/ASTUtils.cpp | 5 ++++ .../Checkers/WebKit/PtrTypesSemantics.cpp | 2 +- .../Checkers/WebKit/PtrTypesSemantics.h | 3 +- .../Checkers/WebKit/unretained-call-args.mm | 28 +++++++++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index 6f13d552b4b44..b629de3254ed3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -160,6 +160,11 @@ bool tryToFindPtrOrigin( if (Name == "__builtin___CFStringMakeConstantString" || Name == "NSClassFromString") return callback(E, true); + } else if (auto *CalleeE = call->getCallee()) { + if (auto *E = dyn_cast(CalleeE->IgnoreParenCasts())) { + if (isSingleton(E->getFoundDecl())) + return callback(E, true); + } } // Sometimes, canonical type erroneously turns Ref into T. diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 56747d72136e3..90b2343b4be77 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -479,7 +479,7 @@ bool isTrivialBuiltinFunction(const FunctionDecl *F) { Name.starts_with("os_log") || Name.starts_with("_os_log"); } -bool isSingleton(const FunctionDecl *F) { +bool isSingleton(const NamedDecl *F) { assert(F); // FIXME: check # of params == 1 if (auto *MethodDecl = dyn_cast(F)) { diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index 3c9560cb8059b..d2095d07e1434 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -21,6 +21,7 @@ class CXXMethodDecl; class CXXRecordDecl; class Decl; class FunctionDecl; +class NamedDecl; class QualType; class RecordType; class Stmt; @@ -156,7 +157,7 @@ bool isPtrConversion(const FunctionDecl *F); bool isTrivialBuiltinFunction(const FunctionDecl *F); /// \returns true if \p F is a static singleton function. -bool isSingleton(const FunctionDecl *F); +bool isSingleton(const NamedDecl *F); /// An inter-procedural analysis facility that detects functions with "trivial" /// behavior with respect to reference counting, such as simple field getters. diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm index f39822ee2a8c6..75eead070fdf9 100644 --- a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm +++ b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm @@ -438,6 +438,34 @@ void use_const_local() { } // namespace const_global +namespace var_decl_ref_singleton { + +static Class initSomeObject() { return nil; } +static Class (*getSomeObjectClassSingleton)() = initSomeObject; + +bool foo(NSString *obj) { + return [obj isKindOfClass:getSomeObjectClassSingleton()]; +} + +class Bar { +public: + Class someObject(); + static Class staticSomeObject(); +}; +typedef Class (Bar::*SomeObjectSingleton)(); + +bool bar(NSObject *obj, Bar *bar, SomeObjectSingleton someObjSingleton) { + return [obj isKindOfClass:(bar->*someObjSingleton)()]; + // expected-warning@-1{{Call argument for parameter 'aClass' is unretained and unsafe}} +} + +bool baz(NSObject *obj) { + Class (*someObjectSingleton)() = Bar::staticSomeObject; + return [obj isKindOfClass:someObjectSingleton()]; +} + +} // namespace var_decl_ref_singleton + namespace ns_retained_return_value { NSString *provideNS() NS_RETURNS_RETAINED; From ba3bce0779fa195867aa804146c2ec24cfaf9976 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 12 Sep 2025 15:25:28 -0700 Subject: [PATCH 190/734] [Github] Switch back to tj-actions/changed-files (#158335) We were using the step security fork after the tj-actions/changed-files supply chain attack given Github disabled the repo and all our actions were failing during that time. Switch away from the fork back to the main repository to avoid an extra level of indirection until we can probably just stop using this action/roll our own. --- .github/workflows/docs.yml | 2 +- .github/workflows/pr-code-format.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b627803f61b27..8cdd39c164cca 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -60,7 +60,7 @@ jobs: fetch-depth: 2 - name: Get subprojects that have doc changes id: docs-changed-subprojects - uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1 + uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5 with: skip_initial_fetch: true base_sha: 'HEAD~1' diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml index 9341eaf3ce7c2..9396bf019e1ac 100644 --- a/.github/workflows/pr-code-format.yml +++ b/.github/workflows/pr-code-format.yml @@ -25,7 +25,7 @@ jobs: - name: Get changed files id: changed-files - uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1 + uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5 with: separator: "," skip_initial_fetch: true From 13eecf7f9f42dfded46d8feaa01bc77962d10845 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 12 Sep 2025 15:29:28 -0700 Subject: [PATCH 191/734] [RISC] Use hasBEXTILike in useInversedSetcc and shouldFoldSelectWithSingleBitTest. (#158366) Add hasVendorXTHeadCondMov to shouldFoldSelectWithSingleBitTest. The optimizations in these functions is equally applicable to these. I changed the RUN line for xtheadcondmove in condops.ll to use XTHeadBs to get coverage of the hasBEXTILike changes. I didn't think it was worth an additional RUN line and check prefix. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +++--- llvm/test/CodeGen/RISCV/condops.ll | 19 ++++++++----------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 523b857f9e6cd..c3071ad5cd697 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -18950,7 +18950,7 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate // BEXTI, where C is power of 2. - if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && + if (Subtarget.hasBEXTILike() && VT.isScalarInteger() && (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) { SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); @@ -24939,8 +24939,8 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( EVT VT, const APInt &AndMask) const { - if (Subtarget.hasCZEROLike()) - return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024); + if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov()) + return !Subtarget.hasBEXTILike() && AndMask.ugt(1024); return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); } diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll index 4fb3dff88017c..9d95f1f5c9615 100644 --- a/llvm/test/CodeGen/RISCV/condops.ll +++ b/llvm/test/CodeGen/RISCV/condops.ll @@ -3,7 +3,7 @@ ; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs < %s | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv32 -target-abi=ilp32f -mattr=+f,+zbs,+xventanacondops < %s | FileCheck %s -check-prefix=RV32XVENTANACONDOPS ; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs,+xventanacondops < %s | FileCheck %s -check-prefix=RV64XVENTANACONDOPS -; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs,+xtheadcondmov < %s | FileCheck %s -check-prefix=RV64XTHEADCONDMOV +; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+xtheadbs,+xtheadcondmov < %s | FileCheck %s -check-prefix=RV64XTHEADCONDMOV ; RUN: llc -mtriple=riscv32 -target-abi=ilp32f -mattr=+f,+zbs,+zicond < %s | FileCheck %s -check-prefix=RV32ZICOND ; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs,+zicond < %s | FileCheck %s -check-prefix=RV64ZICOND @@ -126,7 +126,7 @@ define i64 @zero_singlebit1(i64 %rs1, i64 %rs2) { ; ; RV64XTHEADCONDMOV-LABEL: zero_singlebit1: ; RV64XTHEADCONDMOV: # %bb.0: -; RV64XTHEADCONDMOV-NEXT: bexti a1, a1, 12 +; RV64XTHEADCONDMOV-NEXT: th.tst a1, a1, 12 ; RV64XTHEADCONDMOV-NEXT: th.mvnez a0, zero, a1 ; RV64XTHEADCONDMOV-NEXT: ret ; @@ -179,9 +179,8 @@ define i64 @zero_singlebit2(i64 %rs1, i64 %rs2) { ; ; RV64XTHEADCONDMOV-LABEL: zero_singlebit2: ; RV64XTHEADCONDMOV: # %bb.0: -; RV64XTHEADCONDMOV-NEXT: slli a1, a1, 51 -; RV64XTHEADCONDMOV-NEXT: srai a1, a1, 63 -; RV64XTHEADCONDMOV-NEXT: and a0, a1, a0 +; RV64XTHEADCONDMOV-NEXT: th.tst a1, a1, 12 +; RV64XTHEADCONDMOV-NEXT: th.mveqz a0, zero, a1 ; RV64XTHEADCONDMOV-NEXT: ret ; ; RV32ZICOND-LABEL: zero_singlebit2: @@ -4297,9 +4296,8 @@ define i64 @single_bit(i64 %x) { ; ; RV64XTHEADCONDMOV-LABEL: single_bit: ; RV64XTHEADCONDMOV: # %bb.0: # %entry -; RV64XTHEADCONDMOV-NEXT: slli a1, a0, 53 -; RV64XTHEADCONDMOV-NEXT: srai a1, a1, 63 -; RV64XTHEADCONDMOV-NEXT: and a0, a1, a0 +; RV64XTHEADCONDMOV-NEXT: andi a1, a0, 1024 +; RV64XTHEADCONDMOV-NEXT: th.mveqz a0, zero, a1 ; RV64XTHEADCONDMOV-NEXT: ret ; ; RV32ZICOND-LABEL: single_bit: @@ -4353,9 +4351,8 @@ define i64 @single_bit2(i64 %x) { ; ; RV64XTHEADCONDMOV-LABEL: single_bit2: ; RV64XTHEADCONDMOV: # %bb.0: # %entry -; RV64XTHEADCONDMOV-NEXT: slli a1, a0, 52 -; RV64XTHEADCONDMOV-NEXT: srai a1, a1, 63 -; RV64XTHEADCONDMOV-NEXT: and a0, a1, a0 +; RV64XTHEADCONDMOV-NEXT: th.tst a1, a0, 11 +; RV64XTHEADCONDMOV-NEXT: th.mveqz a0, zero, a1 ; RV64XTHEADCONDMOV-NEXT: ret ; ; RV32ZICOND-LABEL: single_bit2: From 1131e44ed3f5fadb2d22ff155d4e47f69757d02f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 12 Sep 2025 15:29:44 -0700 Subject: [PATCH 192/734] [RISCV] Use hasCPOPLike in isCtpopFast and getPopcntSupport (#158371) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 4 +- llvm/test/CodeGen/RISCV/xcvbitmanip.ll | 47 +++++++++++++++++++ 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c3071ad5cd697..f9b484b98739f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -24844,8 +24844,7 @@ bool RISCVTargetLowering::isCtpopFast(EVT VT) const { return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) return true; - // FIXME: Should use hasCPOPLike here. - return Subtarget.hasStdExtZbb() && + return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 1ca513214f67c..a06faa414a2ef 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -289,9 +289,7 @@ bool RISCVTTIImpl::hasActiveVectorLength() const { TargetTransformInfo::PopcntSupportKind RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - return ST->hasStdExtZbb() || (ST->hasVendorXCVbitmanip() && !ST->is64Bit()) - ? TTI::PSK_FastHardware - : TTI::PSK_Software; + return ST->hasCPOPLike() ? TTI::PSK_FastHardware : TTI::PSK_Software; } InstructionCost RISCVTTIImpl::getPartialReductionCost( diff --git a/llvm/test/CodeGen/RISCV/xcvbitmanip.ll b/llvm/test/CodeGen/RISCV/xcvbitmanip.ll index d25ff28475c4b..b2cebabb7df8b 100644 --- a/llvm/test/CodeGen/RISCV/xcvbitmanip.ll +++ b/llvm/test/CodeGen/RISCV/xcvbitmanip.ll @@ -229,3 +229,50 @@ define i32 @test.llvm.bitrev(i32 %a) { %1 = call i32 @llvm.bitreverse(i32 %a) ret i32 %1 } + +define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind { +; CHECK-LABEL: ctpop_i32_ult_two: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.cnt a0, a0 +; CHECK-NEXT: sltiu a0, a0, 2 +; CHECK-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp ult i32 %1, 2 + ret i1 %2 +} + +define i1 @ctpop_i32_ugt_one(i32 signext %a) nounwind { +; CHECK-LABEL: ctpop_i32_ugt_one: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.cnt a0, a0 +; CHECK-NEXT: sltiu a0, a0, 2 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp ugt i32 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind { +; CHECK-LABEL: ctpop_i32_eq_one: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.cnt a0, a0 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp eq i32 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind { +; CHECK-LABEL: ctpop_i32_ne_one: +; CHECK: # %bb.0: +; CHECK-NEXT: cv.cnt a0, a0 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp ne i32 %1, 1 + ret i1 %2 +} From 52c583b3f95a0e666ab837e39a5db900b66adf15 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Fri, 12 Sep 2025 15:58:16 -0700 Subject: [PATCH 193/734] [SampleFDO][TypeProf]Support vtable type profiling for ext-binary and text format (#148002) This change extends SampleFDO ext-binary and text format to record the vtable symbols and their counts for virtual calls inside a function. The vtable profiles will allow the compiler to annotate vtable types on IR instructions and perform vtable-based indirect call promotion. An RFC is in https://discourse.llvm.org/t/rfc-vtable-type-profiling-for-samplefdo/87283 Given a function below, the before vs after of a function's profile is illustrated in text format in the table: ``` __attribute__((noinline)) int loop_func(int i, int a, int b) { Base *ptr = createType(i); int sum = ptr->func(a, b); delete ptr; return sum; } ``` | before | after | | --- | --- | | Samples collected in the function's body {
0: 636241
1: 681458, calls: _Z10createTypei:681458
3: 543499, calls: _ZN12_GLOBAL__N_18Derived24funcEii:410621 _ZN8Derived14funcEii:132878
5.1: 602201, calls: _ZN12_GLOBAL__N_18Derived2D0Ev:454635 _ZN8Derived1D0Ev:147566
7: 511057
} | Samples collected in the function's body {
0: 636241
1: 681458, calls: _Z10createTypei:681458
3: 543499, calls: _ZN12_GLOBAL__N_18Derived24funcEii:410621 _ZN8Derived14funcEii:132878
3: vtables: _ZTV8Derived1:1377 _ZTVN12_GLOBAL__N_18Derived2E:4250
5.1: 602201, calls: _ZN12_GLOBAL__N_18Derived2D0Ev:454635 _ZN8Derived1D0Ev:147566
5.1: vtables: _ZTV8Derived1:227 _ZTVN12_GLOBAL__N_18Derived2E:765
7: 511057
} | Key points for this change: 1. In-memory representation of vtable profiles * A field of type `map>` is introduced in a function's in-memory representation [FunctionSamples](https://github.com/llvm/llvm-project/blob/ccc416312ed72e92a885425d9cb9c01f9afa58eb/llvm/include/llvm/ProfileData/SampleProf.h#L749-L754). 2. The vtable counters for one LineLocation represents the relative frequency among vtables for this LineLocation. They are not required to be comparable across LineLocations. 3. For backward compatibility of ext-binary format, we take one bit from ProfSummaryFlag as illustrated in the enum class `SecProfSummaryFlags`. The ext-binary profile reader parses the integer type flag and reads this bit. If it's set, the profile reader will parse vtable profiles. 4. The vtable profiles are optional in ext-binary format, and not serialized out by default, we introduce an LLVM boolean option (named `-extbinary-write-vtable-type-prof`). The ext-binary profile writer reads the boolean option and decide whether to set the section flag bit and serialize the in-memory class members corresponding to vtables. 5. This change doesn't implement `llvm-profdata overlap --sample` for the vtable profiles. A subsequent change will do it to keep this one focused on the profile format change. We don't plan to add the vtable support to non-extensible format mainly because of the maintenance cost to keep backward compatibility for prior versions of profile data. * Currently, the [non-extensible binary format](https://github.com/llvm/llvm-project/blob/5c28af409978c19a35021855a29dcaa65e95da00/llvm/lib/ProfileData/SampleProfWriter.cpp#L899-L900) does not have feature parity with extensible binary format today, for instance, the former doesn't support [profile symbol list](https://github.com/llvm/llvm-project/blob/41e22aa31b1905aa3e9d83c0343a96ec0d5187ec/llvm/include/llvm/ProfileData/SampleProf.h#L1518-L1522) or context-sensitive PGO, both of which give measurable performance boost. Presumably the non-extensible format is not in wide use. --------- Co-authored-by: Paschalis Mpeis --- llvm/include/llvm/ProfileData/SampleProf.h | 101 ++++++++++++++- .../llvm/ProfileData/SampleProfReader.h | 12 ++ .../llvm/ProfileData/SampleProfWriter.h | 14 +- llvm/lib/ProfileData/SampleProf.cpp | 40 ++++++ llvm/lib/ProfileData/SampleProfReader.cpp | 120 +++++++++++++++++- llvm/lib/ProfileData/SampleProfWriter.cpp | 80 +++++++++++- .../Inputs/profile-symbol-list-ext.expected | 44 +++++++ .../Inputs/sample-profile-ext.proftext | 18 +++ .../profile-symbol-list-compress.test | 9 ++ .../llvm-profdata/profile-symbol-list.test | 9 ++ llvm/test/tools/llvm-profdata/roundtrip.test | 6 + 11 files changed, 436 insertions(+), 17 deletions(-) create mode 100644 llvm/test/tools/llvm-profdata/Inputs/profile-symbol-list-ext.expected create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-profile-ext.proftext diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index a626071d23915..c0e5d2d79cea2 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -62,7 +62,7 @@ enum class sampleprof_error { uncompress_failed, zlib_unavailable, hash_mismatch, - illegal_line_offset + illegal_line_offset, }; inline std::error_code make_error_code(sampleprof_error E) { @@ -91,6 +91,8 @@ struct is_error_code_enum : std::true_type {}; namespace llvm { namespace sampleprof { +constexpr char kVTableProfPrefix[] = "vtables "; + enum SampleProfileFormat { SPF_None = 0, SPF_Text = 0x1, @@ -204,6 +206,9 @@ enum class SecProfSummaryFlags : uint32_t { /// SecFlagIsPreInlined means this profile contains ShouldBeInlined /// contexts thus this is CS preinliner computed. SecFlagIsPreInlined = (1 << 4), + + /// SecFlagHasVTableTypeProf means this profile contains vtable type profiles. + SecFlagHasVTableTypeProf = (1 << 5), }; enum class SecFuncMetadataFlags : uint32_t { @@ -303,7 +308,7 @@ struct LineLocation { } uint64_t getHashCode() const { - return ((uint64_t) Discriminator << 32) | LineOffset; + return ((uint64_t)Discriminator << 32) | LineOffset; } uint32_t LineOffset; @@ -318,16 +323,30 @@ struct LineLocationHash { LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); +/// Key represents type of a C++ polymorphic class type by its vtable and value +/// represents its counter. +/// TODO: The class name FunctionId should be renamed to SymbolId in a refactor +/// change. +using TypeCountMap = std::map; + +/// Write \p Map to the output stream. Keys are linearized using \p NameTable +/// and written as ULEB128. Values are written as ULEB128 as well. +std::error_code +serializeTypeMap(const TypeCountMap &Map, + const MapVector &NameTable, + raw_ostream &OS); + /// Representation of a single sample record. /// /// A sample record is represented by a positive integer value, which /// indicates how frequently was the associated line location executed. /// /// Additionally, if the associated location contains a function call, -/// the record will hold a list of all the possible called targets. For -/// direct calls, this will be the exact function being invoked. For -/// indirect calls (function pointers, virtual table dispatch), this -/// will be a list of one or more functions. +/// the record will hold a list of all the possible called targets and the types +/// for virtual table dispatches. For direct calls, this will be the exact +/// function being invoked. For indirect calls (function pointers, virtual table +/// dispatch), this will be a list of one or more functions. For virtual table +/// dispatches, this record will also hold the type of the object. class SampleRecord { public: using CallTarget = std::pair; @@ -746,6 +765,7 @@ using BodySampleMap = std::map; // memory, which is *very* significant for large profiles. using FunctionSamplesMap = std::map; using CallsiteSampleMap = std::map; +using CallsiteTypeMap = std::map; using LocToLocMap = std::unordered_map; @@ -939,6 +959,14 @@ class FunctionSamples { return &Iter->second; } + /// Returns the TypeCountMap for inlined callsites at the given \p Loc. + const TypeCountMap *findCallsiteTypeSamplesAt(const LineLocation &Loc) const { + auto Iter = VirtualCallsiteTypeCounts.find(mapIRLocToProfileLoc(Loc)); + if (Iter == VirtualCallsiteTypeCounts.end()) + return nullptr; + return &Iter->second; + } + /// Returns a pointer to FunctionSamples at the given callsite location /// \p Loc with callee \p CalleeName. If no callsite can be found, relax /// the restriction to return the FunctionSamples at callsite location @@ -1000,6 +1028,46 @@ class FunctionSamples { return CallsiteSamples; } + /// Returns vtable access samples for the C++ types collected in this + /// function. + const CallsiteTypeMap &getCallsiteTypeCounts() const { + return VirtualCallsiteTypeCounts; + } + + /// Returns the vtable access samples for the C++ types for \p Loc. + /// Under the hood, the caller-specified \p Loc will be un-drifted before the + /// type sample lookup if possible. + TypeCountMap &getTypeSamplesAt(const LineLocation &Loc) { + return VirtualCallsiteTypeCounts[mapIRLocToProfileLoc(Loc)]; + } + + /// Scale \p Other sample counts by \p Weight and add the scaled result to the + /// type samples for \p Loc. Under the hoold, the caller-provided \p Loc will + /// be un-drifted before the type sample lookup if possible. + /// typename T is either a std::map or a DenseMap. + template + sampleprof_error addCallsiteVTableTypeProfAt(const LineLocation &Loc, + const T &Other, + uint64_t Weight = 1) { + static_assert((std::is_same_v || + std::is_same_v) && + std::is_same_v, + "T must be a map with StringRef or FunctionId as key and " + "uint64_t as value"); + TypeCountMap &TypeCounts = getTypeSamplesAt(Loc); + bool Overflowed = false; + + for (const auto [Type, Count] : Other) { + FunctionId TypeId(Type); + bool RowOverflow = false; + TypeCounts[TypeId] = SaturatingMultiplyAdd( + Count, Weight, TypeCounts[TypeId], &RowOverflow); + Overflowed |= RowOverflow; + } + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; + } + /// Return the maximum of sample counts in a function body. When SkipCallSite /// is false, which is the default, the return count includes samples in the /// inlined functions. When SkipCallSite is true, the return count only @@ -1054,6 +1122,10 @@ class FunctionSamples { mergeSampleProfErrors(Result, FSMap[Rec.first].merge(Rec.second, Weight)); } + for (const auto &[Loc, OtherTypeMap] : Other.getCallsiteTypeCounts()) + mergeSampleProfErrors( + Result, addCallsiteVTableTypeProfAt(Loc, OtherTypeMap, Weight)); + return Result; } @@ -1297,6 +1369,23 @@ class FunctionSamples { /// collected in the call to baz() at line offset 8. CallsiteSampleMap CallsiteSamples; + /// Map a virtual callsite to the list of accessed vtables and vtable counts. + /// The callsite is referenced by its source location. + /// + /// For example, given: + /// + /// void foo() { + /// ... + /// 5 inlined_vcall_bar(); + /// ... + /// 5 inlined_vcall_baz(); + /// ... + /// 200 inlined_vcall_qux(); + /// } + /// This map will contain two entries. One with two types for line offset 5 + /// and one with one type for line offset 200. + CallsiteTypeMap VirtualCallsiteTypeCounts; + /// IR to profile location map generated by stale profile matching. /// /// Each entry is a mapping from the location on current build to the matched diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index bfe079fbe536f..799938ab901c1 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -589,6 +589,10 @@ class SampleProfileReader { /// Whether the function profiles use FS discriminators. bool ProfileIsFS = false; + /// If true, the profile has vtable profiles and reader should decode them + /// to parse profiles correctly. + bool ReadVTableProf = false; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; @@ -703,6 +707,14 @@ class LLVM_ABI SampleProfileReaderBinary : public SampleProfileReader { /// otherwise same as readStringFromTable, also return its hash value. ErrorOr> readSampleContextFromTable(); + /// Read all virtual functions' vtable access counts for \p FProfile. + std::error_code readCallsiteVTableProf(FunctionSamples &FProfile); + + /// Read bytes from the input buffer pointed by `Data` and decode them into + /// \p M. `Data` will be advanced to the end of the read bytes when this + /// function returns. Returns error if any. + std::error_code readVTableTypeCountMap(TypeCountMap &M); + /// Points to the current location in the buffer. const uint8_t *Data = nullptr; diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index e84b2095efd7b..9dbeaf56509b0 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -217,13 +217,20 @@ class LLVM_ABI SampleProfileWriterBinary : public SampleProfileWriter { std::error_code writeBody(const FunctionSamples &S); inline void stablizeNameTable(MapVector &NameTable, std::set &V); - + MapVector NameTable; - + void addName(FunctionId FName); virtual void addContext(const SampleContext &Context); void addNames(const FunctionSamples &S); + /// Write \p CallsiteTypeMap to the output stream \p OS. + std::error_code + writeCallsiteVTableProf(const CallsiteTypeMap &CallsiteTypeMap, + raw_ostream &OS); + + bool WriteVTableProf = false; + private: LLVM_ABI friend ErrorOr> SampleProfileWriter::create(std::unique_ptr &OS, @@ -412,8 +419,7 @@ class LLVM_ABI SampleProfileWriterExtBinaryBase class LLVM_ABI SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { public: - SampleProfileWriterExtBinary(std::unique_ptr &OS) - : SampleProfileWriterExtBinaryBase(OS) {} + SampleProfileWriterExtBinary(std::unique_ptr &OS); private: std::error_code writeDefaultLayout(const SampleProfileMap &ProfileMap); diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index 60c1393616713..ac7513ef2cb49 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -47,6 +47,24 @@ bool FunctionSamples::ProfileIsPreInlined = false; bool FunctionSamples::UseMD5 = false; bool FunctionSamples::HasUniqSuffix = true; bool FunctionSamples::ProfileIsFS = false; + +std::error_code +serializeTypeMap(const TypeCountMap &Map, + const MapVector &NameTable, + raw_ostream &OS) { + encodeULEB128(Map.size(), OS); + for (const auto &[TypeName, SampleCount] : Map) { + if (auto NameIndexIter = NameTable.find(TypeName); + NameIndexIter != NameTable.end()) { + encodeULEB128(NameIndexIter->second, OS); + } else { + // If the type is not in the name table, we cannot serialize it. + return sampleprof_error::truncated_name_table; + } + encodeULEB128(SampleCount, OS); + } + return sampleprof_error::success; +} } // namespace sampleprof } // namespace llvm @@ -178,6 +196,17 @@ raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, return OS; } +static void printTypeCountMap(raw_ostream &OS, LineLocation Loc, + const TypeCountMap &TypeCountMap) { + if (TypeCountMap.empty()) { + return; + } + OS << Loc << ": vtables: "; + for (const auto &[Type, Count] : TypeCountMap) + OS << Type << ":" << Count << " "; + OS << "\n"; +} + /// Print the samples collected for a function on stream \p OS. void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { if (getFunctionHash()) @@ -192,7 +221,13 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { SampleSorter SortedBodySamples(BodySamples); for (const auto &SI : SortedBodySamples.get()) { OS.indent(Indent + 2); + const auto &Loc = SI->first; OS << SI->first << ": " << SI->second; + if (const TypeCountMap *TypeCountMap = + this->findCallsiteTypeSamplesAt(Loc)) { + OS.indent(Indent + 2); + printTypeCountMap(OS, Loc, *TypeCountMap); + } } OS.indent(Indent); OS << "}\n"; @@ -214,6 +249,11 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { OS << Loc << ": inlined callee: " << FuncSample.getFunction() << ": "; FuncSample.print(OS, Indent + 4); } + auto TypeSamplesIter = VirtualCallsiteTypeCounts.find(Loc); + if (TypeSamplesIter != VirtualCallsiteTypeCounts.end()) { + OS.indent(Indent + 2); + printTypeCountMap(OS, Loc, TypeSamplesIter->second); + } } OS.indent(Indent); OS << "}\n"; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 12769a391286c..81ae792e70b99 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -197,8 +197,37 @@ enum class LineType { CallSiteProfile, BodyProfile, Metadata, + VirtualCallTypeProfile, }; +// Parse `Input` as a white-space separated list of `vtable:count` pairs. An +// example input line is `_ZTVbar:1471 _ZTVfoo:630`. +static bool parseTypeCountMap(StringRef Input, + DenseMap &TypeCountMap) { + for (size_t Index = Input.find_first_not_of(' '); Index != StringRef::npos;) { + size_t ColonIndex = Input.find(':', Index); + if (ColonIndex == StringRef::npos) + return false; // No colon found, invalid format. + StringRef TypeName = Input.substr(Index, ColonIndex - Index); + // CountIndex is the start index of count. + size_t CountStartIndex = ColonIndex + 1; + // NextIndex is the start index after the 'target:count' pair. + size_t NextIndex = Input.find_first_of(' ', CountStartIndex); + uint64_t Count; + if (Input.substr(CountStartIndex, NextIndex - CountStartIndex) + .getAsInteger(10, Count)) + return false; // Invalid count. + // Error on duplicated type names in one line of input. + auto [Iter, Inserted] = TypeCountMap.insert({TypeName, Count}); + if (!Inserted) + return false; + Index = (NextIndex == StringRef::npos) + ? StringRef::npos + : Input.find_first_not_of(' ', NextIndex); + } + return true; +} + /// Parse \p Input as line sample. /// /// \param Input input line. @@ -215,6 +244,7 @@ static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, uint64_t &NumSamples, uint32_t &LineOffset, uint32_t &Discriminator, StringRef &CalleeName, DenseMap &TargetCountMap, + DenseMap &TypeCountMap, uint64_t &FunctionHash, uint32_t &Attributes, bool &IsFlat) { for (Depth = 0; Input[Depth] == ' '; Depth++) @@ -306,6 +336,10 @@ static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, // Change n3 to the next blank space after colon + integer pair. n3 = n4; } + } else if (Rest.starts_with(kVTableProfPrefix)) { + LineTy = LineType::VirtualCallTypeProfile; + return parseTypeCountMap(Rest.substr(strlen(kVTableProfPrefix)), + TypeCountMap); } else { LineTy = LineType::CallSiteProfile; size_t n3 = Rest.find_last_of(':'); @@ -374,19 +408,27 @@ std::error_code SampleProfileReaderText::readImpl() { uint64_t NumSamples; StringRef FName; DenseMap TargetCountMap; + DenseMap TypeCountMap; uint32_t Depth, LineOffset, Discriminator; LineType LineTy = LineType::BodyProfile; uint64_t FunctionHash = 0; uint32_t Attributes = 0; bool IsFlat = false; + // TODO: Update ParseLine to return an error code instead of a bool and + // report it. if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset, - Discriminator, FName, TargetCountMap, FunctionHash, - Attributes, IsFlat)) { + Discriminator, FName, TargetCountMap, TypeCountMap, + FunctionHash, Attributes, IsFlat)) { switch (LineTy) { case LineType::Metadata: reportError(LineIt.line_number(), "Cannot parse metadata: " + *LineIt); break; + case LineType::VirtualCallTypeProfile: + reportError(LineIt.line_number(), + "Expected 'vtables [mangled_vtable:NUM]+', found " + + *LineIt); + break; default: reportError(LineIt.line_number(), "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + @@ -417,6 +459,14 @@ std::error_code SampleProfileReaderText::readImpl() { DepthMetadata = 0; break; } + + case LineType::VirtualCallTypeProfile: { + mergeSampleProfErrors( + Result, InlineStack.back()->addCallsiteVTableTypeProfAt( + LineLocation(LineOffset, Discriminator), TypeCountMap)); + break; + } + case LineType::BodyProfile: { FunctionSamples &FProfile = *InlineStack.back(); for (const auto &name_count : TargetCountMap) { @@ -598,6 +648,67 @@ SampleProfileReaderBinary::readSampleContextFromTable() { return std::make_pair(Context, Hash); } +std::error_code +SampleProfileReaderBinary::readVTableTypeCountMap(TypeCountMap &M) { + auto NumVTableTypes = readNumber(); + if (std::error_code EC = NumVTableTypes.getError()) + return EC; + + for (uint32_t I = 0; I < *NumVTableTypes; ++I) { + auto VTableType(readStringFromTable()); + if (std::error_code EC = VTableType.getError()) + return EC; + + auto VTableSamples = readNumber(); + if (std::error_code EC = VTableSamples.getError()) + return EC; + // The source profile should not have duplicate vtable records at the same + // location. In case duplicate vtables are found, reader can emit a warning + // but continue processing the profile. + if (!M.insert(std::make_pair(*VTableType, *VTableSamples)).second) { + Ctx.diagnose(DiagnosticInfoSampleProfile( + Buffer->getBufferIdentifier(), 0, + "Duplicate vtable type " + VTableType->str() + + " at the same location. Additional counters will be ignored.", + DS_Warning)); + continue; + } + } + return sampleprof_error::success; +} + +std::error_code +SampleProfileReaderBinary::readCallsiteVTableProf(FunctionSamples &FProfile) { + assert(ReadVTableProf && + "Cannot read vtable profiles if ReadVTableProf is false"); + + // Read the vtable type profile for the callsite. + auto NumCallsites = readNumber(); + if (std::error_code EC = NumCallsites.getError()) + return EC; + + for (uint32_t I = 0; I < *NumCallsites; ++I) { + auto LineOffset = readNumber(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + if (!isOffsetLegal(*LineOffset)) + return sampleprof_error::illegal_line_offset; + + auto Discriminator = readNumber(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + // Here we handle FS discriminators: + const uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); + + if (std::error_code EC = readVTableTypeCountMap(FProfile.getTypeSamplesAt( + LineLocation(*LineOffset, DiscriminatorVal)))) + return EC; + } + return sampleprof_error::success; +} + std::error_code SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { auto NumSamples = readNumber(); @@ -678,6 +789,9 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { return EC; } + if (ReadVTableProf) + return readCallsiteVTableProf(FProfile); + return sampleprof_error::success; } @@ -740,6 +854,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection( FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true; if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) FunctionSamples::ProfileIsFS = ProfileIsFS = true; + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagHasVTableTypeProf)) + ReadVTableProf = true; break; case SecNameTable: { bool FixedLengthMD5 = diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 9173a0f94f69d..e5f31348578b8 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -41,6 +41,11 @@ using namespace llvm; using namespace sampleprof; +// To begin with, make this option off by default. +static cl::opt ExtBinaryWriteVTableTypeProf( + "extbinary-write-vtable-type-prof", cl::init(false), cl::Hidden, + cl::desc("Write vtable type profile in ext-binary sample profile writer")); + namespace llvm { namespace support { namespace endian { @@ -435,6 +440,9 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection( addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined); if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); + if (Type == SecProfSummary && ExtBinaryWriteVTableTypeProf) + addSectionFlag(SecProfSummary, + SecProfSummaryFlags::SecFlagHasVTableTypeProf); uint64_t SectionStart = markSectionStart(Type, LayoutIdx); switch (Type) { @@ -478,6 +486,12 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection( return sampleprof_error::success; } +SampleProfileWriterExtBinary::SampleProfileWriterExtBinary( + std::unique_ptr &OS) + : SampleProfileWriterExtBinaryBase(OS) { + WriteVTableProf = ExtBinaryWriteVTableTypeProf; +} + std::error_code SampleProfileWriterExtBinary::writeDefaultLayout( const SampleProfileMap &ProfileMap) { // The const indices passed to writeOneSection below are specifying the @@ -587,6 +601,19 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { OS << " " << J.first << ":" << J.second; OS << "\n"; LineCount++; + + if (const TypeCountMap *Map = S.findCallsiteTypeSamplesAt(Loc); + Map && !Map->empty()) { + OS.indent(Indent + 1); + Loc.print(OS); + OS << ": "; + OS << kVTableProfPrefix; + for (const auto [TypeName, Count] : *Map) { + OS << TypeName << ":" << Count << " "; + } + OS << "\n"; + LineCount++; + } } SampleSorter SortedCallsiteSamples( @@ -603,7 +630,21 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { if (std::error_code EC = writeSample(CalleeSamples)) return EC; } + + if (const TypeCountMap *Map = S.findCallsiteTypeSamplesAt(Loc); + Map && !Map->empty()) { + OS.indent(Indent); + Loc.print(OS); + OS << ": "; + OS << kVTableProfPrefix; + for (const auto [TypeId, Count] : *Map) { + OS << TypeId << ":" << Count << " "; + } + OS << "\n"; + LineCount++; + } } + Indent -= 1; if (FunctionSamples::ProfileIsProbeBased) { @@ -663,6 +704,17 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) { addName(CalleeSamples.getFunction()); addNames(CalleeSamples); } + + if (!WriteVTableProf) + return; + // Add all the vtable names to NameTable. + for (const auto &VTableAccessCountMap : + llvm::make_second_range(S.getCallsiteTypeCounts())) { + // Add type name to NameTable. + for (const auto Type : llvm::make_first_range(VTableAccessCountMap)) { + addName(Type); + } + } } void SampleProfileWriterExtBinaryBase::addContext( @@ -801,6 +853,22 @@ std::error_code SampleProfileWriterExtBinaryBase::writeHeader( return sampleprof_error::success; } +std::error_code SampleProfileWriterBinary::writeCallsiteVTableProf( + const CallsiteTypeMap &CallsiteTypeMap, raw_ostream &OS) { + assert(WriteVTableProf && + "writeCallsiteVTableProf should not be called if WriteVTableProf is " + "false"); + + encodeULEB128(CallsiteTypeMap.size(), OS); + for (const auto &[Loc, TypeMap] : CallsiteTypeMap) { + Loc.serialize(OS); + if (std::error_code EC = serializeTypeMap(TypeMap, getNameTable(), OS)) + return EC; + } + + return sampleprof_error::success; +} + std::error_code SampleProfileWriterBinary::writeSummary() { auto &OS = *OutputStream; encodeULEB128(Summary->getTotalCount(), OS); @@ -838,14 +906,16 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { for (const auto &J : S.getCallsiteSamples()) NumCallsites += J.second.size(); encodeULEB128(NumCallsites, OS); - for (const auto &[Loc, CalleeFunctionSampleMap] : S.getCallsiteSamples()) - for (const auto &FunctionSample : - llvm::make_second_range(CalleeFunctionSampleMap)) { - Loc.serialize(OS); - if (std::error_code EC = writeBody(FunctionSample)) + for (const auto &J : S.getCallsiteSamples()) + for (const auto &FS : J.second) { + J.first.serialize(OS); + if (std::error_code EC = writeBody(FS.second)) return EC; } + if (WriteVTableProf) + return writeCallsiteVTableProf(S.getCallsiteTypeCounts(), OS); + return sampleprof_error::success; } diff --git a/llvm/test/tools/llvm-profdata/Inputs/profile-symbol-list-ext.expected b/llvm/test/tools/llvm-profdata/Inputs/profile-symbol-list-ext.expected new file mode 100644 index 0000000000000..f7e7499a2c781 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/profile-symbol-list-ext.expected @@ -0,0 +1,44 @@ +Function: main: 368038, 0, 7 sampled lines +Samples collected in the function's body { + 4: 1068 + 4.2: 1068 + 5: 2150 + 5.1: 2150 + 6: 4160 + 7: 1068 + 9: 4128, calls: _Z3bari:2942 _Z3fooi:1262 + 9: vtables: _ZTVbar:2942 _ZTVfoo:1260 +} +Samples collected in inlined callsites { + 10: inlined callee: inline1: 2000, 0, 1 sampled lines + Samples collected in the function's body { + 1: 2000 + } + No inlined callsites in this function + 10: inlined callee: inline2: 4000, 0, 1 sampled lines + Samples collected in the function's body { + 1: 4000 + } + No inlined callsites in this function + 10: vtables: _ZTVinline1:2000 _ZTVinline2:4000 +} +Function: _Z3bari: 40602, 2874, 1 sampled lines +Samples collected in the function's body { + 1: 2874 +} +No inlined callsites in this function +Function: _Z3fooi: 15422, 1220, 1 sampled lines +Samples collected in the function's body { + 1: 1220 +} +No inlined callsites in this function +======== Dump profile symbol list ======== +_Z3goov +_Z3sumii +__libc_csu_fini +__libc_csu_init +_dl_relocate_static_pie +_fini +_init +_start +main diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-profile-ext.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-profile-ext.proftext new file mode 100644 index 0000000000000..100133fa17ccb --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-profile-ext.proftext @@ -0,0 +1,18 @@ +main:184019:0 + 4: 534 + 4.2: 534 + 5: 1075 + 5.1: 1075 + 6: 2080 + 7: 534 + 9: 2064 _Z3bari:1471 _Z3fooi:631 + 9: vtables _ZTVbar:1471 _ZTVfoo:630 + 10: inline1:1000 + 1: 1000 + 10: inline2:2000 + 1: 2000 + 10: vtables _ZTVinline1:1000 _ZTVinline2:2000 +_Z3bari:20301:1437 + 1: 1437 +_Z3fooi:7711:610 + 1: 610 diff --git a/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test b/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test index b445695c8e8e4..8383bcc1a2fbe 100644 --- a/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test +++ b/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test @@ -4,3 +4,12 @@ REQUIRES: zlib ; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections %t.1.output %t.2.output -o %t.3.output ; RUN: llvm-profdata show -sample -show-prof-sym-list %t.3.output > %t.4.output ; RUN: diff -b %S/Inputs/profile-symbol-list.expected %t.4.output + +;; Generate two SampleFDO binary profiles and merge them. +;; Tests that the vtable counters in the merged profile are the aggregated +;; result from both sources. +; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections -extbinary-write-vtable-type-prof -prof-sym-list=%S/Inputs/profile-symbol-list-1.text %S/Inputs/sample-profile-ext.proftext -o %t.1.output +; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections -extbinary-write-vtable-type-prof -prof-sym-list=%S/Inputs/profile-symbol-list-2.text %S/Inputs/sample-profile-ext.proftext -o %t.2.output +; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections -extbinary-write-vtable-type-prof %t.1.output %t.2.output -o %t.3.output +; RUN: llvm-profdata show -sample -show-prof-sym-list %t.3.output > %t.4.output +; RUN: diff -b %S/Inputs/profile-symbol-list-ext.expected %t.4.output diff --git a/llvm/test/tools/llvm-profdata/profile-symbol-list.test b/llvm/test/tools/llvm-profdata/profile-symbol-list.test index 39dcd11ec1db7..6845531066c76 100644 --- a/llvm/test/tools/llvm-profdata/profile-symbol-list.test +++ b/llvm/test/tools/llvm-profdata/profile-symbol-list.test @@ -7,3 +7,12 @@ ; RUN: llvm-profdata show -sample -show-sec-info-only %t.5.output | FileCheck %s -check-prefix=NOSYMLIST ; NOSYMLIST: ProfileSymbolListSection {{.*}} Size: 0 + +;; Generate two SampleFDO binary profiles and merge them. +;; Tests that the vtable counters in the merged profile are the aggregated +;; result from both sources. +; RUN: llvm-profdata merge -sample -extbinary -extbinary-write-vtable-type-prof -prof-sym-list=%S/Inputs/profile-symbol-list-1.text %S/Inputs/sample-profile-ext.proftext -o %t.1.output +; RUN: llvm-profdata merge -sample -extbinary -extbinary-write-vtable-type-prof -prof-sym-list=%S/Inputs/profile-symbol-list-2.text %S/Inputs/sample-profile-ext.proftext -o %t.2.output +; RUN: llvm-profdata merge -sample -extbinary -extbinary-write-vtable-type-prof %t.1.output %t.2.output -o %t.3.output +; RUN: llvm-profdata show -sample -show-prof-sym-list %t.3.output > %t.4.output +; RUN: diff -b %S/Inputs/profile-symbol-list-ext.expected %t.4.output diff --git a/llvm/test/tools/llvm-profdata/roundtrip.test b/llvm/test/tools/llvm-profdata/roundtrip.test index 7af76e0a58224..eb55534763877 100644 --- a/llvm/test/tools/llvm-profdata/roundtrip.test +++ b/llvm/test/tools/llvm-profdata/roundtrip.test @@ -16,3 +16,9 @@ RUN: llvm-profdata merge --sample --binary -output=%t.4.profdata %S/Inputs/sampl RUN: llvm-profdata merge --sample --extbinary -output=%t.5.profdata %t.4.profdata RUN: llvm-profdata merge --sample --text -output=%t.4.proftext %t.5.profdata RUN: diff -b %t.4.proftext %S/Inputs/sample-profile.proftext +# Round trip from text --> extbinary --> text. +# The vtable profile is supported by ext-binary profile but not raw binary profile format, +# so we don't use raw binary profile format in this roundtrip. +RUN: llvm-profdata merge --sample --extbinary -extbinary-write-vtable-type-prof --output=%t.5.profdata %S/Inputs/sample-profile-ext.proftext +RUN: llvm-profdata merge --sample --text --output=%t.5.proftext %t.5.profdata +RUN: diff -b %t.5.proftext %S/Inputs/sample-profile-ext.proftext From f32874f77b5a6065a705ffc35b48bff1545cd6cd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 12 Sep 2025 16:09:39 -0700 Subject: [PATCH 194/734] [LegalizeIntegerTypes] Use getShiftAmountConstant. --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9e85f08abb766..87570e6f44a6f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -5254,9 +5254,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue MulLo, MulHi; TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, N->getOperand(0), N->getOperand(1), MulLo, MulHi); - SDValue SRA = - DAG.getNode(ISD::SRA, dl, VT, MulLo, - DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT)); + SDValue SRA = DAG.getNode( + ISD::SRA, dl, VT, MulLo, + DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl)); SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE); SplitInteger(MulLo, Lo, Hi); From bac9e463b1f77b7354fe68c87d58be67e3294806 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 12 Sep 2025 16:15:31 -0700 Subject: [PATCH 195/734] [NFC][CodeGen][CFI] Extract CreateMetadataIdentifierForFnType (#158189) For #158193 --- clang/lib/CodeGen/CGExpr.cpp | 7 ++----- clang/lib/CodeGen/CodeGenModule.cpp | 7 +++++++ clang/lib/CodeGen/CodeGenModule.h | 3 +++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index e8456a44f8367..e6e4947882544 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6496,11 +6496,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, SanitizerDebugLocation SanScope(this, {CheckOrdinal}, CheckHandler); EmitSanitizerStatReport(llvm::SanStat_CFI_ICall); - llvm::Metadata *MD; - if (CGM.getCodeGenOpts().SanitizeCfiICallGeneralizePointers) - MD = CGM.CreateMetadataIdentifierGeneralized(QualType(FnType, 0)); - else - MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0)); + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForFnType(QualType(FnType, 0)); llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a16dfb52f4d90..d45fb823d4c35 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -7934,6 +7934,13 @@ CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, return InternalId; } +llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForFnType(QualType T) { + assert(isa(T)); + if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) + return CreateMetadataIdentifierGeneralized(T); + return CreateMetadataIdentifierForType(T); +} + llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { return CreateMetadataIdentifierImpl(T, MetadataIdMap, ""); } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index f62350fd8d378..8b1ac2d976c5e 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1623,6 +1623,9 @@ class CodeGenModule : public CodeGenTypeCache { /// Generate a KCFI type identifier for T. llvm::ConstantInt *CreateKCFITypeId(QualType T, StringRef Salt); + /// Create a metadata identifier for the given function type. + llvm::Metadata *CreateMetadataIdentifierForFnType(QualType T); + /// Create a metadata identifier for the given type. This may either be an /// MDString (for external identifiers) or a distinct unnamed MDNode (for /// internal identifiers). From 8ac67aa8a9ef0012a619e1395a23a04cbea3abe9 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 12 Sep 2025 16:38:21 -0700 Subject: [PATCH 196/734] [NFC][CFI][CodeGen] Move GeneralizeFunctionType out of CreateMetadataIdentifierGeneralized (#158190) For #158193 --- clang/lib/CodeGen/CodeGenModule.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d45fb823d4c35..a650f27f977c9 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3041,9 +3041,12 @@ void CodeGenModule::createFunctionTypeMetadataForIcall(const FunctionDecl *FD, if (isa(FD) && !cast(FD)->isStatic()) return; - llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType()); + QualType FnType = FD->getType(); + llvm::Metadata *MD = CreateMetadataIdentifierForType(FnType); F->addTypeMetadata(0, MD); - F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType())); + + QualType GenPtrFnType = GeneralizeFunctionType(getContext(), FD->getType()); + F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(GenPtrFnType)); // Emit a hash-based bit set entry for cross-DSO calls. if (CodeGenOpts.SanitizeCfiCrossDso) @@ -7936,8 +7939,10 @@ CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForFnType(QualType T) { assert(isa(T)); - if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) + if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) { + T = GeneralizeFunctionType(getContext(), T); return CreateMetadataIdentifierGeneralized(T); + } return CreateMetadataIdentifierForType(T); } @@ -7951,8 +7956,8 @@ CodeGenModule::CreateMetadataIdentifierForVirtualMemPtrType(QualType T) { } llvm::Metadata *CodeGenModule::CreateMetadataIdentifierGeneralized(QualType T) { - return CreateMetadataIdentifierImpl(GeneralizeFunctionType(getContext(), T), - GeneralizedMetadataIdMap, ".generalized"); + return CreateMetadataIdentifierImpl(T, GeneralizedMetadataIdMap, + ".generalized"); } /// Returns whether this module needs the "all-vtables" type identifier. From 120d7475d35fc16b25c9d7c9b05e0ba44cca6449 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Fri, 12 Sep 2025 16:38:29 -0700 Subject: [PATCH 197/734] [lldb] Change directory creation logic in framework-header-fix (#158355) It's possible for this logic to fail if the build system runs this script in parallel. One instance could create the directory in between another instance's checking of its existence and attempt at creation. Instead, always try to create it and ignore any FileExistsErrors. rdar://160120161 --- lldb/scripts/framework-header-fix.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lldb/scripts/framework-header-fix.py b/lldb/scripts/framework-header-fix.py index 36c5c67c59d36..3447dfc29a761 100755 --- a/lldb/scripts/framework-header-fix.py +++ b/lldb/scripts/framework-header-fix.py @@ -115,8 +115,10 @@ def main(): unifdef_guards = ["-U" + guard for guard in args.unifdef_guards] # Create the framework's header dir if it doesn't already exist - if not os.path.exists(os.path.dirname(output_file_path)): + try: os.makedirs(os.path.dirname(output_file_path)) + except FileExistsError: + pass if framework_version == "lldb_main": modify_main_includes(input_file_path, output_file_path) From 9ac1f3420db82d7446449b8dd1e4ff07f93e7176 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram <96096277+nirvedhmeshram@users.noreply.github.com> Date: Fri, 12 Sep 2025 18:59:58 -0500 Subject: [PATCH 198/734] [Linalg] Fix bug in control function logic of push down extract pattern (#158348) Current logic just bails out if the first extract producer fails the control function, this PR fixes that. Signed-off-by: Nirvedh Meshram --- .../Transforms/DataLayoutPropagation.cpp | 36 ++++++++++++------- .../Linalg/data-layout-propagation.mlir | 30 ++++++++++++++++ .../Linalg/TestDataLayoutPropagation.cpp | 9 +++-- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp index ed2efd6fea5f7..6c17c3c2d0cab 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @@ -1245,21 +1245,21 @@ struct SliceDimInfo { OpFoldResult outputSize; }; -/// Return the first input extract slice operand, if present, for the current +/// Return all extract slice operands, if present, for the current /// generic op. -static FailureOr getSliceOperand(GenericOp genericOp) { - OpOperand *sliceOperand = nullptr; +static FailureOr> +getSliceOperands(GenericOp genericOp) { + SmallVector sliceOperands; for (auto operand : genericOp.getDpsInputOperands()) { auto extractOp = operand->get().getDefiningOp(); if (!extractOp) continue; - sliceOperand = operand; - break; + sliceOperands.push_back(operand); } - if (!sliceOperand) { + if (sliceOperands.empty()) { return failure(); } - return sliceOperand; + return sliceOperands; } // Return a map of dims that have partial slices on them so that other operands @@ -1336,14 +1336,24 @@ pushDownExtractSliceOpThroughGenericOp(RewriterBase &rewriter, genericOp, "propagation through generic with gather semantics is unsupported."); // Collect the sliced operand, if present. - auto maybeSliceOperand = getSliceOperand(genericOp); - if (failed(maybeSliceOperand)) + auto maybeSliceOperands = getSliceOperands(genericOp); + if (failed(maybeSliceOperands)) return failure(); - OpOperand *sliceOperand = *maybeSliceOperand; - unsigned OperandIndex = sliceOperand->getOperandNumber(); - - if (!controlFn(sliceOperand)) + SmallVector sliceOperands = *maybeSliceOperands; + OpOperand *sliceOperand; + + bool foundValidOperand = false; + for (auto currSliceOperand : sliceOperands) { + if (controlFn(currSliceOperand)) { + sliceOperand = currSliceOperand; + foundValidOperand = true; + break; + } + } + if (!foundValidOperand) { return failure(); + } + unsigned OperandIndex = sliceOperand->getOperandNumber(); tensor::ExtractSliceOp producerSliceOp = sliceOperand->get().getDefiningOp(); diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir index fb16e1e7dcda4..a5f8d63a3e912 100644 --- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir +++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir @@ -1577,3 +1577,33 @@ func.func @push_extract_through_generic_rank0_operand(%arg0: tensor<128x128xf32> // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK: %[[EXTRACT:.+]] = tensor.extract_slice %[[GENERIC]] // CHECK: return %[[EXTRACT]] + +// ----- +// Test that if one extract doesnt pass the control function which in this case is set to +// only allow extracts from the same block, then an extract from a later operand can still be pushed +// down. +func.func @push_extract_through_generic_secondextract(%arg0: tensor<128x128xf32>, %arg1: tensor, %arg2: index) -> tensor { + %c0 = arith.constant 0 : index + %c32 = arith.constant 32 : index + %extracted_slice1 = tensor.extract_slice %arg0[%arg2, %arg2] [%arg2, %arg2] [1, 1] : tensor<128x128xf32> to tensor + %for = scf.for %arg3 = %c0 to %c32 step %arg2 iter_args(%arg4 = %arg1) -> tensor { + %extracted_slice = tensor.extract_slice %arg0[%arg2, %arg2] [%arg2, %arg2] [1, 1] : tensor<128x128xf32> to tensor + %0 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,affine_map<(d0, d1) -> (d0, d1)> ,affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice1, %extracted_slice : tensor, tensor) outs(%arg1 : tensor) { + ^bb0(%in: f32, %in_1 : f32, %out: bf16): + %1 = arith.truncf %in : f32 to bf16 + linalg.yield %1 : bf16 + } -> tensor + scf.yield %0 : tensor + } + return %for : tensor +} + +// CHECK-LABEL: func.func @push_extract_through_generic_secondextract +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK: %[[EXTRACT:.+]] = tensor.extract_slice +// CHECK: %[[FOR:.+]] = scf.for +// CHECK: %[[PAD:.+]] = tensor.pad %[[EXTRACT]] +// CHECK: %[[GENERIC:.+]] = linalg.generic +// CHECK-SAME: ins(%[[PAD]], %[[ARG0]] +// CHECK: %[[EXTRACT2:.+]] = tensor.extract_slice %[[GENERIC]] +// CHECK: scf.yield %[[EXTRACT2]] diff --git a/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp b/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp index 2cf25d8fc8c19..d332270468ea8 100644 --- a/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp @@ -34,8 +34,13 @@ struct TestDataLayoutPropagationPass RewritePatternSet patterns(context); linalg::populateDataLayoutPropagationPatterns( patterns, [](OpOperand *opOperand) { return true; }); - linalg::populateExtractSliceSinkingPatterns( - patterns, [](OpOperand *opOperand) { return true; }); + linalg::ControlPropagationFn controlExtract = + [](OpOperand *opOperand) -> bool { + Operation *producer = opOperand->get().getDefiningOp(); + Operation *consumer = opOperand->getOwner(); + return consumer->getBlock() == producer->getBlock(); + }; + linalg::populateExtractSliceSinkingPatterns(patterns, controlExtract); if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) return signalPassFailure(); } From 1cbdb7370fd62b17762d1dfe19a471a70ae8b137 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 12 Sep 2025 14:32:12 -0700 Subject: [PATCH 199/734] Reapply "[lit] Implement ulimit builtin" This reverts commit 330068a74bfb6333f9016e3c4053eeaf4989d601. This was causing some test failures on MacOS that are now fixed in the reland. These failures were related to calling ulimit -v despite XNU not having support for that option. This patch simply disables the test on non-Linux platforms for now until we can have a Linux specific test for ulimit -v. --- llvm/utils/lit/lit/TestRunner.py | 38 ++++++++++++++++++- .../builtin_commands/_launch_with_limit.py | 25 ++++++++++++ .../lit/tests/Inputs/shtest-ulimit/lit.cfg | 8 ++++ .../Inputs/shtest-ulimit/print_limits.py | 4 ++ .../Inputs/shtest-ulimit/ulimit-bad-arg.txt | 1 + .../Inputs/shtest-ulimit/ulimit_okay.txt | 5 +++ llvm/utils/lit/tests/shtest-ulimit.py | 24 ++++++++++++ 7 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt create mode 100644 llvm/utils/lit/tests/shtest-ulimit.py diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index a769919558a47..90c2c6479b004 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -92,11 +92,12 @@ class ShellEnvironment(object): we maintain a dir stack for pushd/popd. """ - def __init__(self, cwd, env, umask=-1): + def __init__(self, cwd, env, umask=-1, ulimit={}): self.cwd = cwd self.env = dict(env) self.umask = umask self.dirStack = [] + self.ulimit = ulimit def change_dir(self, newdir): if os.path.isabs(newdir): @@ -595,6 +596,27 @@ def executeBuiltinUmask(cmd, shenv): return ShellCommandResult(cmd, "", "", 0, False) +def executeBuiltinUlimit(cmd, shenv): + """executeBuiltinUlimit - Change the current limits.""" + if os.name != "posix": + raise InternalShellError(cmd, "'ulimit' not supported on this system") + if len(cmd.args) != 3: + raise InternalShellError(cmd, "'ulimit' requires two arguments") + try: + new_limit = int(cmd.args[2]) + except ValueError as err: + raise InternalShellError(cmd, "Error: 'ulimit': %s" % str(err)) + if cmd.args[1] == "-v": + shenv.ulimit["RLIMIT_AS"] = new_limit * 1024 + elif cmd.args[1] == "-n": + shenv.ulimit["RLIMIT_NOFILE"] = new_limit + else: + raise InternalShellError( + cmd, "'ulimit' does not support option: %s" % cmd.args[1] + ) + return ShellCommandResult(cmd, "", "", 0, False) + + def executeBuiltinColon(cmd, cmd_shenv): """executeBuiltinColon - Discard arguments and exit with status 0.""" return ShellCommandResult(cmd, "", "", 0, False) @@ -749,6 +771,7 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): "popd": executeBuiltinPopd, "pushd": executeBuiltinPushd, "rm": executeBuiltinRm, + "ulimit": executeBuiltinUlimit, "umask": executeBuiltinUmask, ":": executeBuiltinColon, } @@ -914,6 +937,19 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): if kIsWindows: args = quote_windows_command(args) + # Handle any resource limits. We do this by launching the command with + # a wrapper that sets the necessary limits. We use a wrapper rather than + # setting the limits in process as we cannot reraise the limits back to + # their defaults without elevated permissions. + if cmd_shenv.ulimit: + executable = sys.executable + args.insert(0, sys.executable) + args.insert(1, os.path.join(builtin_commands_dir, "_launch_with_limit.py")) + for limit in cmd_shenv.ulimit: + cmd_shenv.env["LIT_INTERNAL_ULIMIT_" + limit] = str( + cmd_shenv.ulimit[limit] + ) + try: # TODO(boomanaiden154): We currently wrap the subprocess.Popen with # os.umask as the umask argument in subprocess.Popen is not diff --git a/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py b/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py new file mode 100644 index 0000000000000..33d2d59ff0dbe --- /dev/null +++ b/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py @@ -0,0 +1,25 @@ +import sys +import subprocess +import resource +import os + +ULIMIT_ENV_VAR_PREFIX = "LIT_INTERNAL_ULIMIT_" + + +def main(argv): + command_args = argv[1:] + for env_var in os.environ: + if env_var.startswith(ULIMIT_ENV_VAR_PREFIX): + limit_str = env_var[len(ULIMIT_ENV_VAR_PREFIX) :] + limit_value = int(os.environ[env_var]) + limit = (limit_value, limit_value) + if limit_str == "RLIMIT_AS": + resource.setrlimit(resource.RLIMIT_AS, limit) + elif limit_str == "RLIMIT_NOFILE": + resource.setrlimit(resource.RLIMIT_NOFILE, limit) + process_output = subprocess.run(command_args) + sys.exit(process_output.returncode) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg new file mode 100644 index 0000000000000..c7bdc7e7b6bc0 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/lit.cfg @@ -0,0 +1,8 @@ +import lit.formats + +config.name = "shtest-ulimit" +config.suffixes = [".txt"] +config.test_format = lit.formats.ShTest(execute_external=False) +config.test_source_root = None +config.test_exec_root = None +config.substitutions.append(("%{python}", '"%s"' % (sys.executable))) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py b/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py new file mode 100644 index 0000000000000..632f954fa8fde --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py @@ -0,0 +1,4 @@ +import resource + +print("RLIMIT_AS=" + str(resource.getrlimit(resource.RLIMIT_AS)[0])) +print("RLIMIT_NOFILE=" + str(resource.getrlimit(resource.RLIMIT_NOFILE)[0])) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt new file mode 100644 index 0000000000000..efa22881047e9 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit-bad-arg.txt @@ -0,0 +1 @@ +# RUN: ulimit -n diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt new file mode 100644 index 0000000000000..ad353b5d7c459 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt @@ -0,0 +1,5 @@ +# RUN: ulimit -v 1048576 +# RUN: ulimit -n 50 +# RUN: %{python} %S/print_limits.py +# Fail the test so that we can assert on the output. +# RUN: not echo return diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py new file mode 100644 index 0000000000000..b86578a21f661 --- /dev/null +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -0,0 +1,24 @@ +# Check the ulimit command + +# ulimit does not work on non-POSIX platforms. +# UNSUPPORTED: system-windows + +# TODO(boomanaiden154): The test fails on some non-Linux POSIX +# platforms (like MacOS) due to the underlying system not supporting +# ulimit -v. This test needs to be carved up so we keep full test +# coverage on Linux and as much as possible on other platforms. +# REQUIRES: system-linux + +# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit | FileCheck %s + +# CHECK: -- Testing: 2 tests{{.*}} + +# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}}) +# CHECK: ulimit -n +# CHECK: 'ulimit' requires two arguments + +# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}}) +# CHECK: ulimit -v 1048576 +# CHECK: ulimit -n 50 +# CHECK: RLIMIT_AS=1073741824 +# CHECK: RLIMIT_NOFILE=50 From a5bff94ffd1b81a3562f02f05980ee87cc4164df Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 12 Sep 2025 17:13:35 -0700 Subject: [PATCH 200/734] [NFC][CodeGen][CFI] Add GeneralizePointers parameter to GeneralizeFunctionType (#158191) For #158193 --------- Co-authored-by: Alex Langford --- clang/lib/CodeGen/CodeGenModule.cpp | 44 +++++++++++++++++------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a650f27f977c9..d25ce3165bd79 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2339,12 +2339,15 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString())); } -// Generalize pointer types to a void pointer with the qualifiers of the -// originally pointed-to type, e.g. 'const char *' and 'char * const *' -// generalize to 'const void *' while 'char *' and 'const char **' generalize to -// 'void *'. -static QualType GeneralizeType(ASTContext &Ctx, QualType Ty) { - if (!Ty->isPointerType()) +// If `GeneralizePointers` is true, generalizes types to a void pointer with the +// qualifiers of the originally pointed-to type, e.g. 'const char *' and 'char * +// const *' generalize to 'const void *' while 'char *' and 'const char **' +// generalize to 'void *'. +static QualType GeneralizeType(ASTContext &Ctx, QualType Ty, + bool GeneralizePointers) { + // TODO: Add other generalizations. + + if (!GeneralizePointers || !Ty->isPointerType()) return Ty; return Ctx.getPointerType( @@ -2353,26 +2356,29 @@ static QualType GeneralizeType(ASTContext &Ctx, QualType Ty) { } // Apply type generalization to a FunctionType's return and argument types -static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty) { +static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty, + bool GeneralizePointers) { if (auto *FnType = Ty->getAs()) { SmallVector GeneralizedParams; for (auto &Param : FnType->param_types()) - GeneralizedParams.push_back(GeneralizeType(Ctx, Param)); + GeneralizedParams.push_back( + GeneralizeType(Ctx, Param, GeneralizePointers)); - return Ctx.getFunctionType(GeneralizeType(Ctx, FnType->getReturnType()), - GeneralizedParams, FnType->getExtProtoInfo()); + return Ctx.getFunctionType( + GeneralizeType(Ctx, FnType->getReturnType(), GeneralizePointers), + GeneralizedParams, FnType->getExtProtoInfo()); } if (auto *FnType = Ty->getAs()) return Ctx.getFunctionNoProtoType( - GeneralizeType(Ctx, FnType->getReturnType())); + GeneralizeType(Ctx, FnType->getReturnType(), GeneralizePointers)); llvm_unreachable("Encountered unknown FunctionType"); } llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T, StringRef Salt) { - if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) - T = GeneralizeFunctionType(getContext(), T); + T = GeneralizeFunctionType( + getContext(), T, getCodeGenOpts().SanitizeCfiICallGeneralizePointers); if (auto *FnType = T->getAs()) T = getContext().getFunctionType( FnType->getReturnType(), FnType->getParamTypes(), @@ -3041,11 +3047,13 @@ void CodeGenModule::createFunctionTypeMetadataForIcall(const FunctionDecl *FD, if (isa(FD) && !cast(FD)->isStatic()) return; - QualType FnType = FD->getType(); + QualType FnType = GeneralizeFunctionType(getContext(), FD->getType(), + /*GeneralizePointers=*/false); llvm::Metadata *MD = CreateMetadataIdentifierForType(FnType); F->addTypeMetadata(0, MD); - QualType GenPtrFnType = GeneralizeFunctionType(getContext(), FD->getType()); + QualType GenPtrFnType = GeneralizeFunctionType(getContext(), FD->getType(), + /*GeneralizePointers=*/true); F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(GenPtrFnType)); // Emit a hash-based bit set entry for cross-DSO calls. @@ -7939,10 +7947,10 @@ CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForFnType(QualType T) { assert(isa(T)); - if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) { - T = GeneralizeFunctionType(getContext(), T); + T = GeneralizeFunctionType( + getContext(), T, getCodeGenOpts().SanitizeCfiICallGeneralizePointers); + if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) return CreateMetadataIdentifierGeneralized(T); - } return CreateMetadataIdentifierForType(T); } From ba3b3e3ac812ae30f12f92ee8c4a1c668cd9817e Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 12 Sep 2025 17:37:50 -0700 Subject: [PATCH 201/734] [NFC][CodeGen][CFI] Pre-commit transparent_union tests (#158192) For #158193 --- clang/test/CodeGen/cfi-icall-generalize.c | 16 ++++++++++++++++ clang/test/CodeGen/cfi-icall-normalize2.c | 14 ++++++++++++++ clang/test/CodeGen/kcfi-generalize.c | 16 ++++++++++++++++ clang/test/CodeGen/kcfi-normalize.c | 14 ++++++++++++++ 4 files changed, 60 insertions(+) diff --git a/clang/test/CodeGen/cfi-icall-generalize.c b/clang/test/CodeGen/cfi-icall-generalize.c index 0af17e5760cc6..46d38511ba6b6 100644 --- a/clang/test/CodeGen/cfi-icall-generalize.c +++ b/clang/test/CodeGen/cfi-icall-generalize.c @@ -15,5 +15,21 @@ void g(int** (*fp)(const char *, const char **)) { fp(0, 0); } +union Union { + char *c; + long *n; +} __attribute__((transparent_union)); + +// CHECK: define{{.*}} void @uni({{.*}} !type [[TYPE2:![0-9]+]] !type [[TYPE2_GENERALIZED:![0-9]+]] +void uni(void (*fn)(union Union), union Union arg1) { + // UNGENERALIZED: call i1 @llvm.type.test(ptr {{.*}}, metadata !"_ZTSFv5UnionE") + // GENERALIZED: call i1 @llvm.type.test(ptr {{.*}}, metadata !"_ZTSFv5UnionE.generalized") + fn(arg1); +} + // CHECK: [[TYPE]] = !{i64 0, !"_ZTSFPPiPKcPS2_E"} // CHECK: [[TYPE_GENERALIZED]] = !{i64 0, !"_ZTSFPvPKvS_E.generalized"} + +// CHECK: [[TYPE2]] = !{i64 0, !"_ZTSFvPFv5UnionES_E"} +// CHECK: [[TYPE2_GENERALIZED]] = !{i64 0, !"_ZTSFvPv5UnionE.generalized"} + diff --git a/clang/test/CodeGen/cfi-icall-normalize2.c b/clang/test/CodeGen/cfi-icall-normalize2.c index 93893065cf903..5e457dc97f0a2 100644 --- a/clang/test/CodeGen/cfi-icall-normalize2.c +++ b/clang/test/CodeGen/cfi-icall-normalize2.c @@ -24,6 +24,20 @@ void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) { fn(arg1, arg2, arg3); } +union Union { + char *c; + long *n; +} __attribute__((transparent_union)); + +void uni(void (*fn)(union Union), union Union arg1) { + // CHECK-LABEL: define{{.*}}uni + // CHECK-SAME: {{.*}}!type ![[TYPE4:[0-9]+]] !type !{{[0-9]+}} + // CHECK: call i1 @llvm.type.test({{i8\*|ptr}} {{%f|%0}}, metadata !"_ZTSFv5UnionE.normalized") + fn(arg1); +} + // CHECK: ![[TYPE1]] = !{i64 0, !"_ZTSFvPFvu3i32ES_E.normalized"} // CHECK: ![[TYPE2]] = !{i64 0, !"_ZTSFvPFvu3i32S_ES_S_E.normalized"} // CHECK: ![[TYPE3]] = !{i64 0, !"_ZTSFvPFvu3i32S_S_ES_S_S_E.normalized"} +// CHECK: ![[TYPE4]] = !{i64 0, !"_ZTSFvPFv5UnionES_E.normalized"} + diff --git a/clang/test/CodeGen/kcfi-generalize.c b/clang/test/CodeGen/kcfi-generalize.c index 4e32f4f35057c..864cdb8c2e092 100644 --- a/clang/test/CodeGen/kcfi-generalize.c +++ b/clang/test/CodeGen/kcfi-generalize.c @@ -26,8 +26,24 @@ void g(int** (*fp)(const char *, const char **)) { fp(0, 0); } +union Union { + char *c; + long *n; +} __attribute__((transparent_union)); + +// CHECK: define{{.*}} void @uni({{.*}} !kcfi_type [[TYPE4:![0-9]+]] +void uni(void (*fn)(union Union), union Union arg1) { + // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 -1037059548) ] + // GENERALIZED: call {{.*}} [ "kcfi"(i32 422130955) ] + fn(arg1); +} + // UNGENERALIZED: [[TYPE]] = !{i32 1296635908} // GENERALIZED: [[TYPE]] = !{i32 -49168686} // UNGENERALIZED: [[TYPE3]] = !{i32 874141567} // GENERALIZED: [[TYPE3]] = !{i32 954385378} + +// UNGENERALIZED: [[TYPE4]] = !{i32 981319178} +// GENERALIZED: [[TYPE4]] = !{i32 -1599950473} + diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c index b9150e88f6ab5..9291ff8529b31 100644 --- a/clang/test/CodeGen/kcfi-normalize.c +++ b/clang/test/CodeGen/kcfi-normalize.c @@ -28,7 +28,21 @@ void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) { fn(arg1, arg2, arg3); } +union Union { + char *c; + long *n; +} __attribute__((transparent_union)); + +void uni(void (*fn)(union Union), union Union arg1) { + // CHECK-LABEL: define{{.*}}uni + // CHECK-SAME: {{.*}}!kcfi_type ![[TYPE4:[0-9]+]] + // CHECK: call void %0(ptr %1) [ "kcfi"(i32 -1430221633) ] + fn(arg1); +} + // CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1} // CHECK: ![[TYPE1]] = !{i32 -1143117868} // CHECK: ![[TYPE2]] = !{i32 -460921415} // CHECK: ![[TYPE3]] = !{i32 -333839615} +// CHECK: ![[TYPE4]] = !{i32 1766237188} + From 9af4a854602804430dc04766ce1be311259707d6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 13 Sep 2025 10:10:59 +0900 Subject: [PATCH 202/734] AMDGPU: Add test which shows unnecessary register alignment (#158168) The b96 tr loads are a special case that does not require even aligned VGPRs --- .../AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll | 66 +++++++++++++++++++ .../AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll | 54 +++++++++++++++ 2 files changed, 120 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll index f504f2caa8632..3e96dfe40f745 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll @@ -158,3 +158,69 @@ entry: store <4 x bfloat> %val, ptr addrspace(1) %use ret void } + +; This is a special case that does not require aligned VGPRs. Make +; sure no copies are required for the unaligned ABI return value. +define { i32, <3 x i32> } @ds_read_b96_tr_b6_no_align2_requirement(ptr addrspace(3) %ptr) { +; GFX950-SDAG-LABEL: ds_read_b96_tr_b6_no_align2_requirement: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: ds_read_b96_tr_b6 v[2:4], v0 offset:32 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, v2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v3 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v4 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: ds_read_b96_tr_b6_no_align2_requirement: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: ds_read_b96_tr_b6 v[2:4], v0 offset:32 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: v_mov_b32_e32 v1, v2 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, v3 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v3, v4 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %val = call <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32.p3(ptr addrspace(3) %gep) + %insert0 = insertvalue { i32, <3 x i32> } poison, i32 0, 0 + %insert1 = insertvalue { i32, <3 x i32> } %insert0, <3 x i32> %val, 1 + ret { i32, <3 x i32> } %insert1 +} + +define void @ds_read_b96_tr_b6_no_align2_requirement_agpr(ptr addrspace(3) %ptr) { +; GFX950-SDAG-LABEL: ds_read_b96_tr_b6_no_align2_requirement_agpr: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: ds_read_b96_tr_b6 v[0:2], v0 offset:32 +; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-SDAG-NEXT: v_accvgpr_write_b32 a1, v0 +; GFX950-SDAG-NEXT: v_accvgpr_write_b32 a2, v1 +; GFX950-SDAG-NEXT: v_accvgpr_write_b32 a3, v2 +; GFX950-SDAG-NEXT: ;;#ASMSTART +; GFX950-SDAG-NEXT: ; use a1 a2 a3 +; GFX950-SDAG-NEXT: ;;#ASMEND +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: ds_read_b96_tr_b6_no_align2_requirement_agpr: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: ds_read_b96_tr_b6 v[0:2], v0 offset:32 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: v_accvgpr_write_b32 a1, v0 +; GFX950-GISEL-NEXT: v_accvgpr_write_b32 a2, v1 +; GFX950-GISEL-NEXT: v_accvgpr_write_b32 a3, v2 +; GFX950-GISEL-NEXT: ;;#ASMSTART +; GFX950-GISEL-NEXT: ; use a1 a2 a3 +; GFX950-GISEL-NEXT: ;;#ASMEND +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %val = call <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32.p3(ptr addrspace(3) %gep) + %val0 = extractelement <3 x i32> %val, i32 0 + %val1 = extractelement <3 x i32> %val, i32 1 + %val2 = extractelement <3 x i32> %val, i32 2 + call void asm sideeffect "; use $0 $1 $2", "{a1},{a2},{a3}"(i32 %val0, i32 %val1, i32 %val2) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll index d91b03ca4461d..d9f2fc55709a6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll @@ -320,3 +320,57 @@ entry: store <8 x bfloat> %val, ptr addrspace(1) %use ret void } + +; This is a special case that does not require aligned VGPRs. Make +; sure no copies are required for the unaligned ABI return value. +define { i32, <3 x i32> } @global_load_tr6_b96_vaddr_no_align2_requirement(ptr addrspace(1) %addr, ptr addrspace(1) %use) { +; GFX1250-LABEL: global_load_tr6_b96_vaddr_no_align2_requirement: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_tr6_b96 v[2:4], v[0:1], off offset:32 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v2 +; GFX1250-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %gep = getelementptr i64, ptr addrspace(1) %addr, i32 4 + %val = call <3 x i32> @llvm.amdgcn.global.load.tr6.b96.v3i32.p1(ptr addrspace(1) %gep) + %insert0 = insertvalue { i32, <3 x i32> } poison, i32 0, 0 + %insert1 = insertvalue { i32, <3 x i32> } %insert0, <3 x i32> %val, 1 + ret { i32, <3 x i32> } %insert1 +} + +define { i32, <3 x i32> } @global_load_tr6_b96_saddr_no_align2_requirement(ptr addrspace(1) inreg %addr, ptr addrspace(1) %use) { +; GFX1250-LABEL: global_load_tr6_b96_saddr_no_align2_requirement: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: global_load_tr6_b96 v[2:4], v0, s[0:1] offset:32 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v2 +; GFX1250-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %gep = getelementptr i64, ptr addrspace(1) %addr, i32 4 + %val = call <3 x i32> @llvm.amdgcn.global.load.tr6.b96.v3i32.p1(ptr addrspace(1) %gep) + %insert0 = insertvalue { i32, <3 x i32> } poison, i32 0, 0 + %insert1 = insertvalue { i32, <3 x i32> } %insert0, <3 x i32> %val, 1 + ret { i32, <3 x i32> } %insert1 +} + +define { i32, <3 x i32> } @ds_load_tr6_b96_no_align2_requirement(ptr addrspace(3) %addr, ptr addrspace(1) %use) { +; GFX1250-LABEL: ds_load_tr6_b96_no_align2_requirement: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: ds_load_tr6_b96 v[2:4], v0 offset:32 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v2 +; GFX1250-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4 + %val = call <3 x i32> @llvm.amdgcn.ds.load.tr6.b96.v3i32.p3(ptr addrspace(3) %gep) + %insert0 = insertvalue { i32, <3 x i32> } poison, i32 0, 0 + %insert1 = insertvalue { i32, <3 x i32> } %insert0, <3 x i32> %val, 1 + ret { i32, <3 x i32> } %insert1 +} From 1180c2ced008e33b0a4b2b91b3cb24724f06147c Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 12 Sep 2025 21:11:17 -0400 Subject: [PATCH 203/734] [AMDGPU] Support lowering of cluster related instrinsics (#157978) Since many code are connected, this also changes how workgroup id is lowered. Co-authored-by: Jay Foad Co-authored-by: Ivan Kosarev --- llvm/docs/AMDGPUUsage.rst | 7 + .../Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp | 8 + .../Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 19 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 221 ++- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 8 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 211 ++- llvm/lib/Target/AMDGPU/SIISelLowering.h | 9 + llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 +- .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 2 + .../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 5 + llvm/lib/Target/AMDGPU/SOPInstructions.td | 19 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 48 + llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 44 + .../llvm.amdgcn.cluster.workgroup.id.ll | 1258 +++++++++++++++++ ...vm.amdgcn.cluster.workgroup.max.flat.id.ll | 194 +++ .../llvm.amdgcn.cluster.workgroup.max.id.ll | 1077 ++++++++++++++ .../lower-work-group-id-intrinsics-hsa.ll | 2 +- .../lower-work-group-id-intrinsics-opt.ll | 390 +++++ .../AMDGPU/lower-work-group-id-intrinsics.ll | 376 +++++ .../AMDGPU/reassoc-mul-add-1-to-mad.ll | 26 +- .../AMDGPU/workgroup-id-in-arch-sgprs.ll | 216 ++- 21 files changed, 4100 insertions(+), 43 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll create mode 100644 llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll create mode 100644 llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 37563203f2f83..cef87e077cc5c 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1812,6 +1812,13 @@ The AMDGPU backend supports the following LLVM IR attributes. offset by one less than the number of dynamic VGPR blocks required by the function encoded in bits 5..3. + "amdgpu-cluster-dims"="x,y,z" Specify the cluster workgroup dimensions. A value of "0,0,0" indicates that + cluster is disabled. A value of "1024,1024,1024" indicates that cluster is enabled, + but the dimensions cannot be determined at compile time. Any other value explicitly + specifies the cluster dimensions. + + This is only relevant on targets with cluster support. + ================================================ ========================================================== Calling Conventions diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp index d158f0f58d711..dda8033f47398 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp @@ -107,6 +107,14 @@ AMDGPUFunctionArgInfo::getPreloadedValue( case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: return std::tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID: + return std::tuple(nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::LDS_KERNEL_ID: return std::tuple(LDSKernelId ? &LDSKernelId : nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index e07d47381ecca..1064e57b9da9e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -111,18 +111,25 @@ struct AMDGPUFunctionArgInfo { DISPATCH_ID = 4, FLAT_SCRATCH_INIT = 5, LDS_KERNEL_ID = 6, // LLVM internal, not part of the ABI - WORKGROUP_ID_X = 10, - WORKGROUP_ID_Y = 11, - WORKGROUP_ID_Z = 12, + WORKGROUP_ID_X = 10, // Also used for cluster ID X. + WORKGROUP_ID_Y = 11, // Also used for cluster ID Y. + WORKGROUP_ID_Z = 12, // Also used for cluster ID Z. PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14, IMPLICIT_BUFFER_PTR = 15, IMPLICIT_ARG_PTR = 16, PRIVATE_SEGMENT_SIZE = 17, + CLUSTER_WORKGROUP_ID_X = 21, + CLUSTER_WORKGROUP_ID_Y = 22, + CLUSTER_WORKGROUP_ID_Z = 23, + CLUSTER_WORKGROUP_MAX_ID_X = 24, + CLUSTER_WORKGROUP_MAX_ID_Y = 25, + CLUSTER_WORKGROUP_MAX_ID_Z = 26, + CLUSTER_WORKGROUP_MAX_FLAT_ID = 27, // VGPRS: - WORKITEM_ID_X = 18, - WORKITEM_ID_Y = 19, - WORKITEM_ID_Z = 20, + WORKITEM_ID_X = 28, + WORKITEM_ID_Y = 29, + WORKITEM_ID_Z = 30, FIRST_VGPR_VALUE = WORKITEM_ID_X }; // clang-format on diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index f18536cd4ab93..d8c4cbbc4fa33 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4452,6 +4452,74 @@ void AMDGPULegalizerInfo::buildLoadInputValue(Register DstReg, } } +bool AMDGPULegalizerInfo::legalizeWorkGroupId( + MachineInstr &MI, MachineIRBuilder &B, + AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const { + Register DstReg = MI.getOperand(0).getReg(); + if (!ST.hasClusters()) { + if (!loadInputValue(DstReg, B, WorkGroupIdPV)) + return false; + MI.eraseFromParent(); + return true; + } + + // Clusters are supported. Return the global position in the grid. If clusters + // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID. + + // WorkGroupIdXYZ = ClusterId == 0 ? + // ClusterIdXYZ : + // ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ + MachineRegisterInfo &MRI = *B.getMRI(); + const LLT S32 = LLT::scalar(32); + Register ClusterIdXYZ = MRI.createGenericVirtualRegister(S32); + Register ClusterMaxIdXYZ = MRI.createGenericVirtualRegister(S32); + Register ClusterWorkGroupIdXYZ = MRI.createGenericVirtualRegister(S32); + if (!loadInputValue(ClusterIdXYZ, B, WorkGroupIdPV) || + !loadInputValue(ClusterWorkGroupIdXYZ, B, ClusterWorkGroupIdPV) || + !loadInputValue(ClusterMaxIdXYZ, B, ClusterMaxIdPV)) + return false; + + auto One = B.buildConstant(S32, 1); + auto ClusterSizeXYZ = B.buildAdd(S32, ClusterMaxIdXYZ, One); + auto GlobalIdXYZ = B.buildAdd(S32, ClusterWorkGroupIdXYZ, + B.buildMul(S32, ClusterIdXYZ, ClusterSizeXYZ)); + + const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); + + switch (MFI->getClusterDims().getKind()) { + case AMDGPU::ClusterDimsAttr::Kind::FixedDims: + case AMDGPU::ClusterDimsAttr::Kind::VariableDims: { + B.buildCopy(DstReg, GlobalIdXYZ); + MI.eraseFromParent(); + return true; + } + case AMDGPU::ClusterDimsAttr::Kind::NoCluster: { + B.buildCopy(DstReg, ClusterIdXYZ); + MI.eraseFromParent(); + return true; + } + case AMDGPU::ClusterDimsAttr::Kind::Unknown: { + using namespace AMDGPU::Hwreg; + unsigned ClusterIdField = HwregEncoding::encode(ID_IB_STS2, 6, 4); + Register ClusterId = MRI.createGenericVirtualRegister(S32); + MRI.setRegClass(ClusterId, &AMDGPU::SReg_32RegClass); + B.buildInstr(AMDGPU::S_GETREG_B32_const) + .addDef(ClusterId) + .addImm(ClusterIdField); + auto Zero = B.buildConstant(S32, 0); + auto NoClusters = + B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), ClusterId, Zero); + B.buildSelect(DstReg, NoClusters, ClusterIdXYZ, GlobalIdXYZ); + MI.eraseFromParent(); + return true; + } + } + + llvm_unreachable("nothing should reach here"); +} + bool AMDGPULegalizerInfo::loadInputValue( Register DstReg, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { @@ -4471,8 +4539,31 @@ bool AMDGPULegalizerInfo::loadInputValue( AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu); const ArgDescriptor WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u); + const ArgDescriptor ClusterWorkGroupIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu); + const ArgDescriptor ClusterWorkGroupIDY = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u); + const ArgDescriptor ClusterWorkGroupIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u); + const ArgDescriptor ClusterWorkGroupMaxIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u); + const ArgDescriptor ClusterWorkGroupMaxIDY = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u); + const ArgDescriptor ClusterWorkGroupMaxIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u); + const ArgDescriptor ClusterWorkGroupMaxFlatID = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u); + + auto LoadConstant = [&](unsigned N) { + B.buildConstant(DstReg, N); + return true; + }; + if (ST.hasArchitectedSGPRs() && (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) { + AMDGPU::ClusterDimsAttr ClusterDims = MFI->getClusterDims(); + bool HasFixedDims = ClusterDims.isFixedDims(); + switch (ArgType) { case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: Arg = &WorkGroupIDX; @@ -4489,6 +4580,53 @@ bool AMDGPULegalizerInfo::loadInputValue( ArgRC = &AMDGPU::SReg_32RegClass; ArgTy = LLT::scalar(32); break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X: + if (HasFixedDims && ClusterDims.getDims()[0] == 1) + return LoadConstant(0); + Arg = &ClusterWorkGroupIDX; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y: + if (HasFixedDims && ClusterDims.getDims()[1] == 1) + return LoadConstant(0); + Arg = &ClusterWorkGroupIDY; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z: + if (HasFixedDims && ClusterDims.getDims()[2] == 1) + return LoadConstant(0); + Arg = &ClusterWorkGroupIDZ; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[0] - 1); + Arg = &ClusterWorkGroupMaxIDX; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[1] - 1); + Arg = &ClusterWorkGroupMaxIDY; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[2] - 1); + Arg = &ClusterWorkGroupMaxIDZ; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID: + Arg = &ClusterWorkGroupMaxFlatID; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; default: break; } @@ -4499,10 +4637,9 @@ bool AMDGPULegalizerInfo::loadInputValue( if (!Arg) { if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) { - // The intrinsic may appear when we have a 0 sized kernarg segment, in which - // case the pointer argument may be missing and we use null. - B.buildConstant(DstReg, 0); - return true; + // The intrinsic may appear when we have a 0 sized kernarg segment, in + // which case the pointer argument may be missing and we use null. + return LoadConstant(0); } // It's undefined behavior if a function marked with the amdgpu-no-* @@ -7415,6 +7552,22 @@ bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::legalizeConstHwRegRead(MachineInstr &MI, + MachineIRBuilder &B, + AMDGPU::Hwreg::Id HwReg, + unsigned LowBit, + unsigned Width) const { + MachineRegisterInfo &MRI = *B.getMRI(); + Register DstReg = MI.getOperand(0).getReg(); + if (!MRI.getRegClassOrNull(DstReg)) + MRI.setRegClass(DstReg, &AMDGPU::SReg_32RegClass); + B.buildInstr(AMDGPU::S_GETREG_B32_const) + .addDef(DstReg) + .addImm(AMDGPU::Hwreg::HwregEncoding::encode(HwReg, LowBit, Width)); + MI.eraseFromParent(); + return true; +} + static constexpr unsigned FPEnvModeBitField = AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 23); @@ -7577,14 +7730,64 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return legalizeWorkitemIDIntrinsic(MI, MRI, B, 2, AMDGPUFunctionArgInfo::WORKITEM_ID_Z); case Intrinsic::amdgcn_workgroup_id_x: - return legalizePreloadedArgIntrin(MI, MRI, B, - AMDGPUFunctionArgInfo::WORKGROUP_ID_X); + return legalizeWorkGroupId( + MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_X, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X); case Intrinsic::amdgcn_workgroup_id_y: - return legalizePreloadedArgIntrin(MI, MRI, B, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); + return legalizeWorkGroupId( + MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Y, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y); case Intrinsic::amdgcn_workgroup_id_z: - return legalizePreloadedArgIntrin(MI, MRI, B, + return legalizeWorkGroupId( + MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z); + case Intrinsic::amdgcn_cluster_id_x: + return ST.hasClusters() && + legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::WORKGROUP_ID_X); + case Intrinsic::amdgcn_cluster_id_y: + return ST.hasClusters() && + legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); + case Intrinsic::amdgcn_cluster_id_z: + return ST.hasClusters() && + legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_cluster_workgroup_id_x: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X); + case Intrinsic::amdgcn_cluster_workgroup_id_y: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y); + case Intrinsic::amdgcn_cluster_workgroup_id_z: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z); + case Intrinsic::amdgcn_cluster_workgroup_flat_id: + return ST.hasClusters() && + legalizeConstHwRegRead(MI, B, AMDGPU::Hwreg::ID_IB_STS2, 21, 4); + case Intrinsic::amdgcn_cluster_workgroup_max_id_x: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X); + case Intrinsic::amdgcn_cluster_workgroup_max_id_y: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y); + case Intrinsic::amdgcn_cluster_workgroup_max_id_z: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z); + case Intrinsic::amdgcn_cluster_workgroup_max_flat_id: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID); case Intrinsic::amdgcn_wave_id: return legalizeWaveID(MI, B); case Intrinsic::amdgcn_lds_kernel_id: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 1f4e02b0d600a..cd44a9ba0807c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -114,6 +114,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { void buildLoadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg, const TargetRegisterClass *ArgRC, LLT ArgTy) const; + bool legalizeWorkGroupId( + MachineInstr &MI, MachineIRBuilder &B, + AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const; bool loadInputValue(Register DstReg, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; @@ -218,6 +223,9 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeConstHwRegRead(MachineInstr &MI, MachineIRBuilder &B, + AMDGPU::Hwreg::Id HwReg, unsigned LowBit, + unsigned Width) const; bool legalizeGetFPEnv(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4927d2be67590..3332723b038f5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2443,6 +2443,53 @@ SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, return DAG.getMergeValues({ConvertedVal, ArgValue.getValue(1)}, SL); } +SDValue SITargetLowering::lowerWorkGroupId( + SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, + AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const { + if (!Subtarget->hasClusters()) + return getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV); + + // Clusters are supported. Return the global position in the grid. If clusters + // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID. + + // WorkGroupIdXYZ = ClusterId == 0 ? + // ClusterIdXYZ : + // ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ + SDValue ClusterIdXYZ = getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV); + SDLoc SL(ClusterIdXYZ); + SDValue ClusterMaxIdXYZ = getPreloadedValue(DAG, MFI, VT, ClusterMaxIdPV); + SDValue One = DAG.getConstant(1, SL, VT); + SDValue ClusterSizeXYZ = DAG.getNode(ISD::ADD, SL, VT, ClusterMaxIdXYZ, One); + SDValue ClusterWorkGroupIdXYZ = + getPreloadedValue(DAG, MFI, VT, ClusterWorkGroupIdPV); + SDValue GlobalIdXYZ = + DAG.getNode(ISD::ADD, SL, VT, ClusterWorkGroupIdXYZ, + DAG.getNode(ISD::MUL, SL, VT, ClusterIdXYZ, ClusterSizeXYZ)); + + switch (MFI.getClusterDims().getKind()) { + case AMDGPU::ClusterDimsAttr::Kind::FixedDims: + case AMDGPU::ClusterDimsAttr::Kind::VariableDims: + return GlobalIdXYZ; + case AMDGPU::ClusterDimsAttr::Kind::NoCluster: + return ClusterIdXYZ; + case AMDGPU::ClusterDimsAttr::Kind::Unknown: { + using namespace AMDGPU::Hwreg; + SDValue ClusterIdField = + DAG.getTargetConstant(HwregEncoding::encode(ID_IB_STS2, 6, 4), SL, VT); + SDNode *GetReg = + DAG.getMachineNode(AMDGPU::S_GETREG_B32_const, SL, VT, ClusterIdField); + SDValue ClusterId(GetReg, 0); + SDValue Zero = DAG.getConstant(0, SL, VT); + return DAG.getNode(ISD::SELECT_CC, SL, VT, ClusterId, Zero, ClusterIdXYZ, + GlobalIdXYZ, DAG.getCondCode(ISD::SETEQ)); + } + } + + llvm_unreachable("nothing should reach here"); +} + SDValue SITargetLowering::getPreloadedValue( SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, AMDGPUFunctionArgInfo::PreloadedValue PVID) const { @@ -2461,9 +2508,30 @@ SDValue SITargetLowering::getPreloadedValue( AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu); const ArgDescriptor WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u); + const ArgDescriptor ClusterWorkGroupIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu); + const ArgDescriptor ClusterWorkGroupIDY = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u); + const ArgDescriptor ClusterWorkGroupIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u); + const ArgDescriptor ClusterWorkGroupMaxIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u); + const ArgDescriptor ClusterWorkGroupMaxIDY = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u); + const ArgDescriptor ClusterWorkGroupMaxIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u); + const ArgDescriptor ClusterWorkGroupMaxFlatID = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u); + + auto LoadConstant = [&](unsigned N) { + return DAG.getConstant(N, SDLoc(), VT); + }; + if (Subtarget->hasArchitectedSGPRs() && - (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx || - CC == CallingConv::AMDGPU_Gfx_WholeWave)) { + (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) { + AMDGPU::ClusterDimsAttr ClusterDims = MFI.getClusterDims(); + bool HasFixedDims = ClusterDims.isFixedDims(); + switch (PVID) { case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: Reg = &WorkGroupIDX; @@ -2480,6 +2548,53 @@ SDValue SITargetLowering::getPreloadedValue( RC = &AMDGPU::SReg_32RegClass; Ty = LLT::scalar(32); break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X: + if (HasFixedDims && ClusterDims.getDims()[0] == 1) + return LoadConstant(0); + Reg = &ClusterWorkGroupIDX; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y: + if (HasFixedDims && ClusterDims.getDims()[1] == 1) + return LoadConstant(0); + Reg = &ClusterWorkGroupIDY; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z: + if (HasFixedDims && ClusterDims.getDims()[2] == 1) + return LoadConstant(0); + Reg = &ClusterWorkGroupIDZ; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[0] - 1); + Reg = &ClusterWorkGroupMaxIDX; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[1] - 1); + Reg = &ClusterWorkGroupMaxIDY; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[2] - 1); + Reg = &ClusterWorkGroupMaxIDZ; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID: + Reg = &ClusterWorkGroupMaxFlatID; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; default: break; } @@ -9539,6 +9654,19 @@ SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const { DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT)); } +SDValue SITargetLowering::lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op, + AMDGPU::Hwreg::Id HwReg, + unsigned LowBit, + unsigned Width) const { + SDLoc SL(Op); + using namespace AMDGPU::Hwreg; + return {DAG.getMachineNode( + AMDGPU::S_GETREG_B32_const, SL, MVT::i32, + DAG.getTargetConstant(HwregEncoding::encode(HwReg, LowBit, Width), + SL, MVT::i32)), + 0}; +} + SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, const ArgDescriptor &Arg) const { @@ -9685,14 +9813,81 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerImplicitZextParam(DAG, Op, MVT::i16, SI::KernelInputOffsets::LOCAL_SIZE_Z); case Intrinsic::amdgcn_workgroup_id_x: - return getPreloadedValue(DAG, *MFI, VT, - AMDGPUFunctionArgInfo::WORKGROUP_ID_X); + return lowerWorkGroupId(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_X, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X); case Intrinsic::amdgcn_workgroup_id_y: - return getPreloadedValue(DAG, *MFI, VT, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); + return lowerWorkGroupId(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Y, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y); case Intrinsic::amdgcn_workgroup_id_z: - return getPreloadedValue(DAG, *MFI, VT, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + return lowerWorkGroupId(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Z, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z); + case Intrinsic::amdgcn_cluster_id_x: + return Subtarget->hasClusters() + ? getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_X) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_id_y: + return Subtarget->hasClusters() + ? getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Y) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_id_z: + return Subtarget->hasClusters() + ? getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Z) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_id_x: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_id_y: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_id_z: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_flat_id: + return Subtarget->hasClusters() + ? lowerConstHwRegRead(DAG, Op, AMDGPU::Hwreg::ID_IB_STS2, 21, 4) + : SDValue(); + case Intrinsic::amdgcn_cluster_workgroup_max_id_x: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_max_id_y: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_max_id_z: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_max_flat_id: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID) + : DAG.getPOISON(VT); case Intrinsic::amdgcn_wave_id: return lowerWaveID(DAG, Op); case Intrinsic::amdgcn_lds_kernel_id: { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 9c26cfa44a83e..ba408a8f64540 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -16,6 +16,7 @@ #include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUISelLowering.h" +#include "SIDefines.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -64,6 +65,11 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, const SDLoc &SL, SDValue Chain, const ISD::InputArg &Arg) const; + SDValue lowerWorkGroupId( + SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, + AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const; SDValue getPreloadedValue(SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, @@ -84,6 +90,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { unsigned NewOpcode) const; SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const; + SDValue lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op, + AMDGPU::Hwreg::Id HwReg, unsigned LowBit, + unsigned Width) const; SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, const ArgDescriptor &ArgDesc) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 24a20cc9dcf82..dffb3d7459e64 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -928,7 +928,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 || Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 || Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 || - Opcode == AMDGPU::S_GETREG_B32; + Opcode == AMDGPU::S_GETREG_B32 || + Opcode == AMDGPU::S_GETREG_B32_const; } /// \returns true if this is an s_store_dword* instruction. This is more diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 54426d33d3473..1f11be475e9f8 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -195,6 +195,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, VGPRForAGPRCopy = AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1); } + + ClusterDims = AMDGPU::ClusterDimsAttr::get(F); } MachineFunctionInfo *SIMachineFunctionInfo::clone( diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index ca8f8033a2d54..45606153db58e 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -465,6 +465,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // Default/requested number of work groups for the function. SmallVector MaxNumWorkGroups = {0, 0, 0}; + // Requested cluster dimensions. + AMDGPU::ClusterDimsAttr ClusterDims; + private: unsigned NumUserSGPRs = 0; unsigned NumSystemSGPRs = 0; @@ -1207,6 +1210,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; } unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; } unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; } + + AMDGPU::ClusterDimsAttr getClusterDims() const { return ClusterDims; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index fe94887cdff98..296ce5a46287c 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1127,19 +1127,26 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo < "$sdst, $simm16" >; -// This is hasSideEffects to allow its use in readcyclecounter selection. // FIXME: Need to truncate immediate to 16-bits. -// FIXME: Should have separate pseudos for known may read MODE and -// only read MODE. -def S_GETREG_B32 : SOPK_Pseudo < +class S_GETREG_B32_Pseudo pattern=[]> : SOPK_Pseudo < "s_getreg_b32", (outs SReg_32:$sdst), (ins hwreg:$simm16), - "$sdst, $simm16", - [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> { + "$sdst, $simm16", pattern>; + +// This is hasSideEffects to allow its use in readcyclecounter selection. +// FIXME: Should have separate pseudos for known may read MODE and +// only read MODE. +def S_GETREG_B32 : S_GETREG_B32_Pseudo< + [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> { let hasSideEffects = 1; let Uses = [MODE]; } +// A version of the pseudo for reading hardware register fields that are +// known to remain the same during the course of the run. Has no side +// effects and doesn't read MODE. +def S_GETREG_B32_const : S_GETREG_B32_Pseudo; + let Defs = [MODE], Uses = [MODE] in { // FIXME: Need to truncate immediate to 16-bits. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 40da4f96aefdb..faae1fee342af 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3533,6 +3533,54 @@ bool isPackedFP32Inst(unsigned Opc) { } } +const std::array &ClusterDimsAttr::getDims() const { + assert(isFixedDims() && "expect kind to be FixedDims"); + return Dims; +} + +std::string ClusterDimsAttr::to_string() const { + SmallString<10> Buffer; + raw_svector_ostream OS(Buffer); + + switch (getKind()) { + case Kind::Unknown: + return ""; + case Kind::NoCluster: { + OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster; + return Buffer.c_str(); + } + case Kind::VariableDims: { + OS << EncoVariableDims << ',' << EncoVariableDims << ',' + << EncoVariableDims; + return Buffer.c_str(); + } + case Kind::FixedDims: { + OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2]; + return Buffer.c_str(); + } + } + llvm_unreachable("Unknown ClusterDimsAttr kind"); +} + +ClusterDimsAttr ClusterDimsAttr::get(const Function &F) { + std::optional> Attr = + getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3); + ClusterDimsAttr::Kind AttrKind = Kind::FixedDims; + + if (!Attr.has_value()) + AttrKind = Kind::Unknown; + else if (all_of(*Attr, [](unsigned V) { return V == EncoNoCluster; })) + AttrKind = Kind::NoCluster; + else if (all_of(*Attr, [](unsigned V) { return V == EncoVariableDims; })) + AttrKind = Kind::VariableDims; + + ClusterDimsAttr A(AttrKind); + if (AttrKind == Kind::FixedDims) + A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]}; + + return A; +} + } // namespace AMDGPU raw_ostream &operator<<(raw_ostream &OS, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 3fcd16f9290b1..3f8d43db5a48c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1813,6 +1813,50 @@ bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode); /// must be defined in terms of bytes. unsigned getLdsDwGranularity(const MCSubtargetInfo &ST); +class ClusterDimsAttr { +public: + enum class Kind { Unknown, NoCluster, VariableDims, FixedDims }; + + ClusterDimsAttr() = default; + + Kind getKind() const { return AttrKind; } + + bool isUnknown() const { return getKind() == Kind::Unknown; } + + bool isNoCluster() const { return getKind() == Kind::NoCluster; } + + bool isFixedDims() const { return getKind() == Kind::FixedDims; } + + bool isVariableedDims() const { return getKind() == Kind::VariableDims; } + + void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); } + + void setNoCluster() { *this = ClusterDimsAttr(Kind::NoCluster); } + + void setVariableDims() { *this = ClusterDimsAttr(Kind::VariableDims); } + + /// \returns the dims stored. Note that this function can only be called if + /// the kind is \p Fixed. + const std::array &getDims() const; + + bool operator==(const ClusterDimsAttr &RHS) const { + return AttrKind == RHS.AttrKind && Dims == RHS.Dims; + } + + std::string to_string() const; + + static ClusterDimsAttr get(const Function &F); + +private: + enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 }; + + ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {} + + std::array Dims = {0, 0, 0}; + + Kind AttrKind = Kind::Unknown; +}; + } // end namespace AMDGPU raw_ostream &operator<<(raw_ostream &OS, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll new file mode 100644 index 0000000000000..aa3b7b3606fd8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll @@ -0,0 +1,1258 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s + +declare i32 @llvm.amdgcn.cluster.workgroup.id.x() #0 +declare i32 @llvm.amdgcn.cluster.workgroup.id.y() #0 +declare i32 @llvm.amdgcn.cluster.workgroup.id.z() #0 + +define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_x: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_and_b32 s2, ttmp6, 15 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_x: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_and_b32 s2, ttmp6, 15 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_and_b32 s2, ttmp6, 15 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_x: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_and_b32 s2, ttmp6, 15 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1,2,2" { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_x_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_x_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_x_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_y: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_y: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_y: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_y_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_y_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_y_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_z: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_z: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_z: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_flat_id(ptr addrspace(1) %out) { +; CHECK-UNKNOWN-LABEL: test_workgroup_flat_id: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4) +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_flat_id: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4) +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_flat_id: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4) +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_flat_id: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4) +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.flat.id() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,2,1" { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_z_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_z_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_z_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll new file mode 100644 index 0000000000000..afe37e371fbc3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s + +declare i32 @llvm.amdgcn.cluster.workgroup.max.flat.id() #0 + +define amdgpu_kernel void @test_workgroup_max_flat_id(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_flat_id: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40018 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_flat_id: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40018 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_flat_id: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40018 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_flat_id: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40018 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.flat.id() + store i32 %id, ptr addrspace(1) %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll new file mode 100644 index 0000000000000..7ea4fa5373e57 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll @@ -0,0 +1,1077 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s + +declare i32 @llvm.amdgcn.cluster.workgroup.max.id.x() #0 +declare i32 @llvm.amdgcn.cluster.workgroup.max.id.y() #0 +declare i32 @llvm.amdgcn.cluster.workgroup.max.id.z() #0 + +define amdgpu_kernel void @test_workgroup_max_id_x(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_x: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_x_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_y(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_y: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_y_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_z(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40014 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_z: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40014 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40014 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40014 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_z_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll index 2554d99def57f..169a84ff1f86b 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll @@ -297,6 +297,6 @@ declare i32 @llvm.amdgcn.workgroup.id.y() declare i32 @llvm.amdgcn.workgroup.id.z() declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg) -attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" } +attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX9ARCH: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll new file mode 100644 index 0000000000000..69439d49e588f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll @@ -0,0 +1,390 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefix=GFX1250-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck -check-prefix=GFX1250-GISEL %s + +define void @test_workgroup_id_x_non_kernel(ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s0, ttmp9, s1 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s0, ttmp9, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_x_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" { +; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s1, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_x_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" { +; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, ttmp9 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, ttmp9 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_x_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" { +; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_lshl_b32 s0, ttmp9, 1 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_y_non_kernel(ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s3, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s1, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s0, s1, s0 +; GFX1250-GISEL-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s3, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s1, s2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_y_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" { +; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_mul_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, s1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s2, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_y_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" { +; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_y_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" { +; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_z_non_kernel(ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s3, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s1, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-GISEL-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_mul_i32 s0, s1, s0 +; GFX1250-GISEL-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s3, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s1, s2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_z_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" { +; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_mul_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, s1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-GISEL-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s2, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_z_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" { +; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_z_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" { +; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 15 +; GFX1250-SDAG-NEXT: s_bfe_u32 s1, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_and_b32 s0, s0, 0x1fffe +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_bfe_u32 s1, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, s0, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + + +declare i32 @llvm.amdgcn.workgroup.id.x() +declare i32 @llvm.amdgcn.workgroup.id.y() +declare i32 @llvm.amdgcn.workgroup.id.z() diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll new file mode 100644 index 0000000000000..497241cff392d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll @@ -0,0 +1,376 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs < %s | FileCheck -check-prefix=GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel < %s | FileCheck -check-prefix=GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel < %s | FileCheck -check-prefix=GFX12-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -global-isel < %s | FileCheck -check-prefix=GFX1250-GISEL %s + +define amdgpu_cs void @_amdgpu_cs_main() { +; GFX9-SDAG-LABEL: _amdgpu_cs_main: +; GFX9-SDAG: ; %bb.0: ; %.entry +; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: _amdgpu_cs_main: +; GFX9-GISEL: ; %bb.0: ; %.entry +; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: _amdgpu_cs_main: +; GFX12-SDAG: ; %bb.0: ; %.entry +; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1 +; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: _amdgpu_cs_main: +; GFX12-GISEL: ; %bb.0: ; %.entry +; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-GISEL-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: _amdgpu_cs_main: +; GFX1250-SDAG: ; %bb.0: ; %.entry +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-SDAG-NEXT: s_and_b32 s3, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40014 +; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, s3, s2 +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_lshr_b32 s5, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, 1 +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, s5, s4 +; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s5, s4 +; GFX1250-SDAG-NEXT: s_cselect_b32 s1, ttmp9, s1 +; GFX1250-SDAG-NEXT: s_cselect_b32 s2, s3, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: _amdgpu_cs_main: +; GFX1250-GISEL: ; %bb.0: ; %.entry +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s0, ttmp9, s1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s1, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s3, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s4, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s1, s3, s1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s4, s4, s1 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s1, s3, s4 +; GFX1250-GISEL-NEXT: s_bfe_u32 s3, ttmp6, 0x40014 +; GFX1250-GISEL-NEXT: s_lshr_b32 s4, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_add_co_i32 s3, s3, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s5, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_mul_i32 s3, s4, s3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s5, s5, s3 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s2, s4, s5 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1250-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-GISEL-NEXT: s_endpgm +.entry: + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + %idy = call i32 @llvm.amdgcn.workgroup.id.y() + %idz = call i32 @llvm.amdgcn.workgroup.id.z() + %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0 + %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1 + %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @workgroup_id_no_clusters() "amdgpu-cluster-dims"="0,0,0" { +; GFX9-SDAG-LABEL: workgroup_id_no_clusters: +; GFX9-SDAG: ; %bb.0: ; %.entry +; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: workgroup_id_no_clusters: +; GFX9-GISEL: ; %bb.0: ; %.entry +; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: workgroup_id_no_clusters: +; GFX12-SDAG: ; %bb.0: ; %.entry +; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1 +; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: workgroup_id_no_clusters: +; GFX12-GISEL: ; %bb.0: ; %.entry +; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-GISEL-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_no_clusters: +; GFX1250-SDAG: ; %bb.0: ; %.entry +; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s1 +; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_no_clusters: +; GFX1250-GISEL: ; %bb.0: ; %.entry +; GFX1250-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1250-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-GISEL-NEXT: s_endpgm +.entry: + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + %idy = call i32 @llvm.amdgcn.workgroup.id.y() + %idz = call i32 @llvm.amdgcn.workgroup.id.z() + %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0 + %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1 + %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @workgroup_id_optimized() "amdgpu-cluster-dims"="2,3,4" { +; GFX9-SDAG-LABEL: workgroup_id_optimized: +; GFX9-SDAG: ; %bb.0: ; %.entry +; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: workgroup_id_optimized: +; GFX9-GISEL: ; %bb.0: ; %.entry +; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: workgroup_id_optimized: +; GFX12-SDAG: ; %bb.0: ; %.entry +; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1 +; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: workgroup_id_optimized: +; GFX12-GISEL: ; %bb.0: ; %.entry +; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-GISEL-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_optimized: +; GFX1250-SDAG: ; %bb.0: ; %.entry +; GFX1250-SDAG-NEXT: s_lshl_b32 s0, ttmp9, 1 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 14 +; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_and_b32 s0, s2, 0x3fffc +; GFX1250-SDAG-NEXT: s_and_b32 s2, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_bfe_u32 s3, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_mul_i32 s2, s2, 3 +; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, s0 +; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s4 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s3 +; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_optimized: +; GFX1250-GISEL: ; %bb.0: ; %.entry +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, 3 +; GFX1250-GISEL-NEXT: s_lshr_b32 s3, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_bfe_u32 s4, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s2, s1 +; GFX1250-GISEL-NEXT: s_lshl2_add_u32 s2, s3, s4 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1250-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-GISEL-NEXT: s_endpgm +.entry: + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + %idy = call i32 @llvm.amdgcn.workgroup.id.y() + %idz = call i32 @llvm.amdgcn.workgroup.id.z() + %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0 + %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1 + %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @caller() { +; GFX9-SDAG-LABEL: caller: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9] +; GFX9-SDAG-NEXT: s_mov_b32 s8, s0 +; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9-SDAG-NEXT: s_mov_b32 s5, callee@abs32@hi +; GFX9-SDAG-NEXT: s_mov_b32 s4, callee@abs32@lo +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_add_u32 s8, s8, s0 +; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, 0 +; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9] +; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] +; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: caller: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9] +; GFX9-GISEL-NEXT: s_mov_b32 s8, s0 +; GFX9-GISEL-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9-GISEL-NEXT: s_mov_b32 s4, callee@abs32@lo +; GFX9-GISEL-NEXT: s_mov_b32 s5, callee@abs32@hi +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_add_u32 s8, s8, s0 +; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, 0 +; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9] +; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] +; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: caller: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX12-SDAG-NEXT: s_mov_b32 s1, callee@abs32@hi +; GFX12-SDAG-NEXT: s_mov_b32 s0, callee@abs32@lo +; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX12-SDAG-NEXT: s_wait_alu 0xfffe +; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: caller: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX12-GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo +; GFX12-GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi +; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX12-GISEL-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: caller: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s2, ttmp9, s1 +; GFX1250-SDAG-NEXT: s_mov_b64 s[0:1], callee@abs64 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-SDAG-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: caller: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s2, ttmp9, s1 +; GFX1250-GISEL-NEXT: s_mov_b64 s[0:1], callee@abs64 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-GISEL-NEXT: s_endpgm + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + call amdgpu_gfx void @callee(i32 %idx) + ret void +} + +declare amdgpu_gfx void @callee(i32) + +declare i32 @llvm.amdgcn.workgroup.id.x() +declare i32 @llvm.amdgcn.workgroup.id.y() +declare i32 @llvm.amdgcn.workgroup.id.z() +declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll index 25609e881254e..b2bcb74e4184f 100644 --- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll +++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll @@ -4089,32 +4089,44 @@ define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) % ; GFX1250-NEXT: s_add_co_i32 s0, s10, 1 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1250-NEXT: v_mul_lo_u32 v1, s0, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-NEXT: v_dual_add_nc_u32 v2, s0, v1 :: v_dual_add_nc_u32 v1, 1, v1 ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_b32 s4, ttmp6, 15 +; GFX1250-NEXT: s_getreg_b32 s5, hwreg(HW_REG_IB_STS2, 6, 4) ; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-NEXT: v_mul_lo_u32 v3, v2, v1 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x4 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_bfe_u32 s3, ttmp6, 0x4000c ; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX1250-NEXT: s_add_co_i32 s3, s3, 1 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_mul_i32 s3, ttmp9, s3 ; GFX1250-NEXT: v_add_nc_u32_e32 v1, v3, v1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_add_co_i32 s4, s4, s3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v2 ; GFX1250-NEXT: v_add_nc_u32_e32 v2, 1, v3 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff +; GFX1250-NEXT: s_cmp_eq_u32 s5, 0 ; GFX1250-NEXT: v_mul_lo_u32 v3, v1, v2 -; GFX1250-NEXT: v_mad_u32 v0, ttmp9, s2, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_cselect_b32 s3, ttmp9, s4 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mad_u32 v0, s3, s2, v0 ; GFX1250-NEXT: v_add_nc_u32_e32 v2, v3, v2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v1 ; GFX1250-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_mad_u32 v3, v2, v3, v2 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[8:9] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1250-NEXT: v_mad_u32 v2, v3, v2, v3 ; GFX1250-NEXT: global_store_b32 v[0:1], v2, off ; GFX1250-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll index 7a64e55abb8d3..afca83a7e1c36 100644 --- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1200 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1200 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { ; @@ -15,6 +17,50 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm ; +; GFX1200-LABEL: workgroup_id_x: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX1200-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0 +; GFX1200-NEXT: s_wait_kmcnt 0x0 +; GFX1200-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1200-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_x: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_and_b32 s3, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1 +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_mul_i32 s2, ttmp9, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, s2 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s2, ttmp9, s3 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_x: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s3, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, 1 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s2, ttmp9, s2 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s3, s3, s2 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s2, ttmp9, s3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: s_endpgm ; GFX12-LABEL: workgroup_id_x: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 @@ -41,6 +87,74 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace ; GFX9-NEXT: global_store_dword v1, v2, s[2:3] ; GFX9-NEXT: s_endpgm ; +; GFX1200-LABEL: workgroup_id_xy: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1200-NEXT: s_and_b32 s4, ttmp7, 0xffff +; GFX1200-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0 +; GFX1200-NEXT: v_mov_b32_e32 v2, s4 +; GFX1200-NEXT: s_wait_kmcnt 0x0 +; GFX1200-NEXT: s_clause 0x1 +; GFX1200-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1200-NEXT: global_store_b32 v1, v2, s[2:3] +; GFX1200-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_xy: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s6, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: s_and_b32 s4, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s6, s6, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s7, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_mul_i32 s5, s4, s6 +; GFX1250-SDAG-NEXT: s_bfe_u32 s6, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s7, 1 +; GFX1250-SDAG-NEXT: s_add_co_i32 s6, s6, s5 +; GFX1250-SDAG-NEXT: s_and_b32 s5, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_mul_i32 s7, ttmp9, s7 +; GFX1250-SDAG-NEXT: s_getreg_b32 s8, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s5, s5, s7 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s8, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s5, ttmp9, s5 +; GFX1250-SDAG-NEXT: s_cselect_b32 s4, s4, s6 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s5 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s4 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_clause 0x1 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: global_store_b32 v0, v2, s[2:3] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_xy: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_bfe_u32 s6, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s6, s6, 1 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: s_and_b32 s4, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_mul_i32 s5, ttmp9, s6 +; GFX1250-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_add_co_i32 s4, s4, s5 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s4, ttmp9, s4 +; GFX1250-GISEL-NEXT: s_bfe_u32 s5, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s7, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s5, s5, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s8, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s5, s7, s5 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX1250-GISEL-NEXT: s_add_co_i32 s8, s8, s5 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s4, s7, s8 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s4 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_clause 0x1 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: global_store_b32 v1, v2, s[2:3] +; GFX1250-GISEL-NEXT: s_endpgm ; GFX12-LABEL: workgroup_id_xy: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 @@ -77,6 +191,99 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac ; GFX9-NEXT: global_store_dword v1, v0, s[4:5] ; GFX9-NEXT: s_endpgm ; +; GFX1200-LABEL: workgroup_id_xyz: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_clause 0x1 +; GFX1200-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1200-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 +; GFX1200-NEXT: s_and_b32 s6, ttmp7, 0xffff +; GFX1200-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0 +; GFX1200-NEXT: s_lshr_b32 s7, ttmp7, 16 +; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1200-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GFX1200-NEXT: s_wait_kmcnt 0x0 +; GFX1200-NEXT: s_clause 0x2 +; GFX1200-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1200-NEXT: global_store_b32 v1, v2, s[2:3] +; GFX1200-NEXT: global_store_b32 v1, v3, s[4:5] +; GFX1200-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_xyz: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-SDAG-NEXT: s_lshr_b32 s6, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s0, 1 +; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 +; GFX1250-SDAG-NEXT: s_bfe_u32 s9, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_mul_i32 s7, s6, s7 +; GFX1250-SDAG-NEXT: s_bfe_u32 s8, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_and_b32 s10, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s9, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s11, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_add_co_i32 s8, s8, s7 +; GFX1250-SDAG-NEXT: s_mul_i32 s7, s10, s9 +; GFX1250-SDAG-NEXT: s_bfe_u32 s9, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_add_co_i32 s11, s11, 1 +; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s9, s7 +; GFX1250-SDAG-NEXT: s_and_b32 s7, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_mul_i32 s11, ttmp9, s11 +; GFX1250-SDAG-NEXT: s_getreg_b32 s12, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s7, s11 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s12, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s7, ttmp9, s7 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s7 +; GFX1250-SDAG-NEXT: s_cselect_b32 s7, s10, s9 +; GFX1250-SDAG-NEXT: s_cselect_b32 s6, s6, s8 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s6 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_clause 0x2 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: global_store_b32 v0, v2, s[2:3] +; GFX1250-SDAG-NEXT: global_store_b32 v0, v3, s[4:5] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_xyz: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s7, ttmp9, s1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s8, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s9, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s10, s8, s0 +; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 +; GFX1250-GISEL-NEXT: s_add_co_i32 s9, s9, s10 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s7 +; GFX1250-GISEL-NEXT: s_cselect_b32 s8, s8, s9 +; GFX1250-GISEL-NEXT: s_bfe_u32 s9, ttmp6, 0x40014 +; GFX1250-GISEL-NEXT: s_lshr_b32 s10, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_add_co_i32 s9, s9, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s11, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_mul_i32 s9, s10, s9 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s11, s11, s9 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s6, s10, s11 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v3, s6 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_clause 0x2 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: global_store_b32 v1, v2, s[2:3] +; GFX1250-GISEL-NEXT: global_store_b32 v1, v3, s[4:5] +; GFX1250-GISEL-NEXT: s_endpgm ; GFX12-LABEL: workgroup_id_xyz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_clause 0x1 @@ -107,7 +314,6 @@ declare i32 @llvm.amdgcn.workgroup.id.x() declare i32 @llvm.amdgcn.workgroup.id.y() declare i32 @llvm.amdgcn.workgroup.id.z() ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX12-GISEL: {{.*}} -; GFX12-SDAG: {{.*}} +; GFX1250: {{.*}} ; GFX9-GISEL: {{.*}} ; GFX9-SDAG: {{.*}} From ffcaeca90a3c0965acace6645f775ab1d876fa6e Mon Sep 17 00:00:00 2001 From: Afanasyev Ivan Date: Sat, 13 Sep 2025 08:45:54 +0700 Subject: [PATCH 204/734] [CodeGen] Fix partial phi input removal in TailDuplicator. (#158265) Tail duplicator removes the first PHI income from the predecessor basic block, while it should remove all operands for this block. PHI instructions happen to have duplicated values for the same predecessor block: * `UnreachableMachineBlockElim` assumes that PHI instruction might have duplicates: https://github.com/llvm/llvm-project/blob/7289f2cd0c371b2539faa628ec0eea58fa61892c/llvm/lib/CodeGen/UnreachableBlockElim.cpp#L160 * `AArch64` directly states that PHI instruction might have duplicates: https://github.com/llvm/llvm-project/blob/7289f2cd0c371b2539faa628ec0eea58fa61892c/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp#L244 * And `Hexagon`: https://github.com/llvm/llvm-project/blob/7289f2cd0c371b2539faa628ec0eea58fa61892c/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp#L844 We have caught the bug on custom out-of-tree backend. `TailDuplicator` should remove all operands corresponding to the removing block. Please note, that bug likely does not affect in-tree backends, because: * It happens only in scenario of **partial** tail duplication (i.e. tail block is duplicated in some predecessors, but not in all of them) * It happens in **Pre-RA** tail duplication only (Post-RA does not contain PHIs, obviously) * The only backend (I know) uses Pre-RA tail duplication is X86. It uses tail duplication via `early-tailduplication` pass which declines partial tail duplication via `canCompletelyDuplicateBB` check, because it uses `TailDuplicator::tailDuplicateBlocks` public API. So, bug happens only in the case of pre-ra partial tail duplication if backend uses `TailDuplicator::tailDuplicate` public API directly. That's why I can not add reproducer test for in-tree backends. --- llvm/lib/CodeGen/TailDuplicator.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 5d720fbbf1c61..9b1420a94142d 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -375,9 +375,14 @@ void TailDuplicator::processPHI( if (!Remove) return; - // Remove PredBB from the PHI node. - MI->removeOperand(SrcOpIdx + 1); - MI->removeOperand(SrcOpIdx); + // MI might have multiple entries for PredBB. Need to remove them all. + for (unsigned N = MI->getNumOperands(); N > 2; N -= 2) { + if (MI->getOperand(N - 1).getMBB() == PredBB) { + MI->removeOperand(N - 1); + MI->removeOperand(N - 2); + } + } + if (MI->getNumOperands() == 1 && !TailBB->hasAddressTaken()) MI->eraseFromParent(); else if (MI->getNumOperands() == 1) From 7aad3830fb208771254b4ae63a01042744471091 Mon Sep 17 00:00:00 2001 From: lntue Date: Fri, 12 Sep 2025 21:49:34 -0400 Subject: [PATCH 205/734] [libc] Some MSVC compatibility changes for src/string/memory_utils. (#158393) --- libc/src/__support/endian_internal.h | 12 +++++++----- libc/src/__support/macros/config.h | 2 ++ libc/src/string/memory_utils/CMakeLists.txt | 1 + libc/src/string/memory_utils/op_generic.h | 11 +++++++++++ libc/src/string/memory_utils/op_x86.h | 10 ++++++++++ libc/src/string/memory_utils/utils.h | 5 +++++ libc/test/UnitTest/CMakeLists.txt | 1 + libc/test/UnitTest/LibcTest.h | 5 +++++ .../libc/test/UnitTest/BUILD.bazel | 1 + 9 files changed, 43 insertions(+), 5 deletions(-) diff --git a/libc/src/__support/endian_internal.h b/libc/src/__support/endian_internal.h index 4ac8709625d3a..07cde7b905c4d 100644 --- a/libc/src/__support/endian_internal.h +++ b/libc/src/__support/endian_internal.h @@ -35,7 +35,7 @@ template <> LIBC_INLINE uint16_t byte_swap(uint16_t value) { #if __has_builtin(__builtin_bswap16) return __builtin_bswap16(value); #else - return (v << 8) | (v >> 8); + return (value << 8) | (value >> 8); #endif // __builtin_bswap16 } @@ -43,8 +43,9 @@ template <> LIBC_INLINE uint32_t byte_swap(uint32_t value) { #if __has_builtin(__builtin_bswap32) return __builtin_bswap32(value); #else - return byte_swap(static_cast(v >> 16)) || - (static_cast(byte_swap(static_cast(v))) + return byte_swap(static_cast(value >> 16)) || + (static_cast( + byte_swap(static_cast(value))) << 16); #endif // __builtin_bswap64 } @@ -53,8 +54,9 @@ template <> LIBC_INLINE uint64_t byte_swap(uint64_t value) { #if __has_builtin(__builtin_bswap64) return __builtin_bswap64(value); #else - return byte_swap(static_cast(v >> 32)) || - (static_cast(byte_swap(static_cast(v))) + return byte_swap(static_cast(value >> 32)) || + (static_cast( + byte_swap(static_cast(value))) << 32); #endif // __builtin_bswap64 } diff --git a/libc/src/__support/macros/config.h b/libc/src/__support/macros/config.h index 501a816d49631..b06a890c9c13c 100644 --- a/libc/src/__support/macros/config.h +++ b/libc/src/__support/macros/config.h @@ -46,6 +46,8 @@ #define __builtin_expect(value, expectation) (value) #define __builtin_unreachable() __assume(0) +#define __builtin_prefetch(X, Y, Z) + #endif // LIBC_COMPILER_IS_MSVC #ifdef __clang__ diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt index 670db30129572..9cabfb9318012 100644 --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -42,6 +42,7 @@ add_header_library( libc.src.__support.macros.config libc.src.__support.macros.optimization libc.src.__support.macros.properties.architectures + libc.src.__support.macros.properties.compiler ) add_header_library( diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h index 37603410e3a51..010f2187a4ffd 100644 --- a/libc/src/string/memory_utils/op_generic.h +++ b/libc/src/string/memory_utils/op_generic.h @@ -31,6 +31,7 @@ #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL #include "src/__support/macros/optimization.h" +#include "src/__support/macros/properties/compiler.h" #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT64 #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/utils.h" @@ -39,12 +40,22 @@ static_assert((UINTPTR_MAX == 4294967295U) || (UINTPTR_MAX == 18446744073709551615UL), "We currently only support 32- or 64-bit platforms"); +#ifdef LIBC_COMPILER_IS_MSVC + +namespace LIBC_NAMESPACE_DECL { +using generic_v128 = __m128i; +using generic_v256 = __m256i; +using generic_v512 = __m512i; +} // namespace LIBC_NAMESPACE_DECL + +#else namespace LIBC_NAMESPACE_DECL { // Compiler types using the vector attributes. using generic_v128 = uint8_t __attribute__((__vector_size__(16))); using generic_v256 = uint8_t __attribute__((__vector_size__(32))); using generic_v512 = uint8_t __attribute__((__vector_size__(64))); } // namespace LIBC_NAMESPACE_DECL +#endif // LIBC_COMPILER_IS_MSVC namespace LIBC_NAMESPACE_DECL { namespace generic { diff --git a/libc/src/string/memory_utils/op_x86.h b/libc/src/string/memory_utils/op_x86.h index 8bd84120c4ffa..1b4052747552d 100644 --- a/libc/src/string/memory_utils/op_x86.h +++ b/libc/src/string/memory_utils/op_x86.h @@ -15,6 +15,7 @@ #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL #include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/compiler.h" #if defined(LIBC_TARGET_ARCH_IS_X86) @@ -57,7 +58,12 @@ LIBC_INLINE_VAR constexpr bool K_AVX512_BW = LLVM_LIBC_IS_DEFINED(__AVX512BW__); // Memcpy repmovsb implementation struct Memcpy { LIBC_INLINE static void repmovsb(void *dst, const void *src, size_t count) { +#ifdef LIBC_COMPILER_IS_MSVC + __movsb(static_cast(dst), + static_cast(src), count); +#else asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); +#endif // LIBC_COMPILER_IS_MSVC } }; @@ -138,8 +144,10 @@ LIBC_INLINE MemcmpReturnType cmp_neq(CPtr p1, CPtr p2, // When we use these SIMD types in template specialization GCC complains: // "ignoring attributes on template argument ‘__m128i’ [-Wignored-attributes]" // Therefore, we disable this warning in this file. +#ifndef LIBC_COMPILER_IS_MSVC #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wignored-attributes" +#endif // !LIBC_COMPILER_IS_MSVC /////////////////////////////////////////////////////////////////////////////// // Specializations for __m128i @@ -366,7 +374,9 @@ LIBC_INLINE MemcmpReturnType cmp_neq<__m512i>(CPtr p1, CPtr p2, size_t offset) { } #endif // __AVX512BW__ +#ifndef LIBC_COMPILER_IS_MSVC #pragma GCC diagnostic pop +#endif // !LIBC_COMPILER_IS_MSVC } // namespace generic } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h index 0f9c9e36a3dcd..86ff4f12e8c26 100644 --- a/libc/src/string/memory_utils/utils.h +++ b/libc/src/string/memory_utils/utils.h @@ -17,6 +17,7 @@ #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL #include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/compiler.h" #include // size_t @@ -90,13 +91,17 @@ LIBC_INLINE void memcpy_inline(void *__restrict dst, // different value of the Size parameter. This doesn't play well with GCC's // Value Range Analysis that wrongly detects out of bounds accesses. We // disable these warnings for the purpose of this function. +#ifndef LIBC_COMPILER_IS_MSVC #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Warray-bounds" #pragma GCC diagnostic ignored "-Wstringop-overread" #pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif // !LIBC_COMPILER_IS_MSVC for (size_t i = 0; i < Size; ++i) static_cast(dst)[i] = static_cast(src)[i]; +#ifndef LIBC_COMPILER_IS_MSVC #pragma GCC diagnostic pop +#endif // !LIBC_COMPILER_IS_MSVC #endif } diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index f1a83fc601e5e..31d1e9dce8204 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -76,6 +76,7 @@ add_unittest_framework_library( libc.src.__support.CPP.string_view libc.src.__support.CPP.type_traits libc.src.__support.fixed_point.fx_rep + libc.src.__support.macros.properties.compiler libc.src.__support.macros.properties.types libc.src.__support.OSUtil.osutil libc.src.__support.uint128 diff --git a/libc/test/UnitTest/LibcTest.h b/libc/test/UnitTest/LibcTest.h index fbeafd0bacb75..cf098cdd7a49a 100644 --- a/libc/test/UnitTest/LibcTest.h +++ b/libc/test/UnitTest/LibcTest.h @@ -30,6 +30,7 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/c_string.h" +#include "src/__support/macros/properties/compiler.h" #include "test/UnitTest/ExecuteFunction.h" #include "test/UnitTest/TestLogger.h" @@ -260,7 +261,11 @@ constexpr char const *GetPrettyFunctionParamType(char const *str) { // This function recovers ParamType at compile time by using __PRETTY_FUNCTION__ // It can be customized by using the REGISTER_TYPE_NAME macro below. template static constexpr const char *GetTypeName() { +#ifdef LIBC_COMPILER_IS_MSVC + return GetPrettyFunctionParamType(__FUNCSIG__); +#else return GetPrettyFunctionParamType(__PRETTY_FUNCTION__); +#endif // LIBC_COMPILER_IS_MSVC } template diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel index 24baaf1983a08..318397615d0e3 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel @@ -62,6 +62,7 @@ libc_test_library( "//libc:__support_libc_errno", "//libc:__support_macros_config", "//libc:__support_macros_properties_architectures", + "//libc:__support_macros_properties_compiler", "//libc:__support_macros_properties_types", "//libc:__support_stringutil", "//libc:__support_uint128", From 0ca54d7738103f5ff352f7194b34a11aa4d5aea0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 12 Sep 2025 18:54:48 -0700 Subject: [PATCH 206/734] [LegalizeTypes] Use getShiftAmountConstant in SplitInteger. (#158392) This function contained old code for handling the case that the type returned getScalarShiftAmountTy can't hold the shift amount. These days this is handled by getShiftAmountTy which is used by getShiftAmountConstant. --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 83fade45d1892..cc0fd7993916c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1026,14 +1026,9 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == Op.getValueSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); - unsigned ReqShiftAmountInBits = - Log2_32_Ceil(Op.getValueType().getSizeInBits()); - MVT ShiftAmountTy = - TLI.getScalarShiftAmountTy(DAG.getDataLayout(), Op.getValueType()); - if (ReqShiftAmountInBits > ShiftAmountTy.getSizeInBits()) - ShiftAmountTy = MVT::getIntegerVT(NextPowerOf2(ReqShiftAmountInBits)); - Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, - DAG.getConstant(LoVT.getSizeInBits(), dl, ShiftAmountTy)); + Hi = DAG.getNode( + ISD::SRL, dl, Op.getValueType(), Op, + DAG.getShiftAmountConstant(LoVT.getSizeInBits(), Op.getValueType(), dl)); Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } From 004f209199d53a0c7a00ca7af4446407da4c9fb1 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 12 Sep 2025 19:18:58 -0700 Subject: [PATCH 207/734] [CodeGen][CFI] Generalize transparent union parameters (#158193) According GCC documentation transparent union calling convention is the same as the type of the first member of the union. C++ ignores attribute. Note, it does not generalize args of function pointer args. It's unnecessary with pointer generalization. It will be fixed in followup patch. --------- Co-authored-by: lntue --- clang/lib/CodeGen/CodeGenModule.cpp | 15 ++++++++++++++- clang/test/CodeGen/cfi-icall-generalize.c | 8 ++++---- clang/test/CodeGen/cfi-icall-normalize2.c | 4 ++-- clang/test/CodeGen/kcfi-generalize.c | 9 ++++----- clang/test/CodeGen/kcfi-normalize.c | 11 ++++++----- 5 files changed, 30 insertions(+), 17 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d25ce3165bd79..0ebab141b187d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2339,13 +2339,26 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString())); } +static QualType GeneralizeTransparentUnion(QualType Ty) { + const RecordType *UT = Ty->getAsUnionType(); + if (!UT) + return Ty; + const RecordDecl *UD = UT->getOriginalDecl()->getDefinitionOrSelf(); + if (!UD->hasAttr()) + return Ty; + for (const auto *it : UD->fields()) { + return it->getType(); + } + return Ty; +} + // If `GeneralizePointers` is true, generalizes types to a void pointer with the // qualifiers of the originally pointed-to type, e.g. 'const char *' and 'char * // const *' generalize to 'const void *' while 'char *' and 'const char **' // generalize to 'void *'. static QualType GeneralizeType(ASTContext &Ctx, QualType Ty, bool GeneralizePointers) { - // TODO: Add other generalizations. + Ty = GeneralizeTransparentUnion(Ty); if (!GeneralizePointers || !Ty->isPointerType()) return Ty; diff --git a/clang/test/CodeGen/cfi-icall-generalize.c b/clang/test/CodeGen/cfi-icall-generalize.c index 46d38511ba6b6..5995540ba33fb 100644 --- a/clang/test/CodeGen/cfi-icall-generalize.c +++ b/clang/test/CodeGen/cfi-icall-generalize.c @@ -22,14 +22,14 @@ union Union { // CHECK: define{{.*}} void @uni({{.*}} !type [[TYPE2:![0-9]+]] !type [[TYPE2_GENERALIZED:![0-9]+]] void uni(void (*fn)(union Union), union Union arg1) { - // UNGENERALIZED: call i1 @llvm.type.test(ptr {{.*}}, metadata !"_ZTSFv5UnionE") - // GENERALIZED: call i1 @llvm.type.test(ptr {{.*}}, metadata !"_ZTSFv5UnionE.generalized") + // UNGENERALIZED: call i1 @llvm.type.test(ptr {{.*}}, metadata !"_ZTSFvPcE") + // GENERALIZED: call i1 @llvm.type.test(ptr {{.*}}, metadata !"_ZTSFvPvE.generalized") fn(arg1); } // CHECK: [[TYPE]] = !{i64 0, !"_ZTSFPPiPKcPS2_E"} // CHECK: [[TYPE_GENERALIZED]] = !{i64 0, !"_ZTSFPvPKvS_E.generalized"} -// CHECK: [[TYPE2]] = !{i64 0, !"_ZTSFvPFv5UnionES_E"} -// CHECK: [[TYPE2_GENERALIZED]] = !{i64 0, !"_ZTSFvPv5UnionE.generalized"} +// CHECK: [[TYPE2]] = !{i64 0, !"_ZTSFvPFv5UnionEPcE"} +// CHECK: [[TYPE2_GENERALIZED]] = !{i64 0, !"_ZTSFvPvS_E.generalized"} diff --git a/clang/test/CodeGen/cfi-icall-normalize2.c b/clang/test/CodeGen/cfi-icall-normalize2.c index 5e457dc97f0a2..9fa6f95e523d7 100644 --- a/clang/test/CodeGen/cfi-icall-normalize2.c +++ b/clang/test/CodeGen/cfi-icall-normalize2.c @@ -32,12 +32,12 @@ union Union { void uni(void (*fn)(union Union), union Union arg1) { // CHECK-LABEL: define{{.*}}uni // CHECK-SAME: {{.*}}!type ![[TYPE4:[0-9]+]] !type !{{[0-9]+}} - // CHECK: call i1 @llvm.type.test({{i8\*|ptr}} {{%f|%0}}, metadata !"_ZTSFv5UnionE.normalized") + // CHECK: call i1 @llvm.type.test({{i8\*|ptr}} {{%f|%0}}, metadata !"_ZTSFvPu2i8E.normalized") fn(arg1); } // CHECK: ![[TYPE1]] = !{i64 0, !"_ZTSFvPFvu3i32ES_E.normalized"} // CHECK: ![[TYPE2]] = !{i64 0, !"_ZTSFvPFvu3i32S_ES_S_E.normalized"} // CHECK: ![[TYPE3]] = !{i64 0, !"_ZTSFvPFvu3i32S_S_ES_S_S_E.normalized"} -// CHECK: ![[TYPE4]] = !{i64 0, !"_ZTSFvPFv5UnionES_E.normalized"} +// CHECK: ![[TYPE4]] = !{i64 0, !"_ZTSFvPFv5UnionEPu2i8E.normalized"} diff --git a/clang/test/CodeGen/kcfi-generalize.c b/clang/test/CodeGen/kcfi-generalize.c index 864cdb8c2e092..5a44d97412af9 100644 --- a/clang/test/CodeGen/kcfi-generalize.c +++ b/clang/test/CodeGen/kcfi-generalize.c @@ -33,8 +33,8 @@ union Union { // CHECK: define{{.*}} void @uni({{.*}} !kcfi_type [[TYPE4:![0-9]+]] void uni(void (*fn)(union Union), union Union arg1) { - // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 -1037059548) ] - // GENERALIZED: call {{.*}} [ "kcfi"(i32 422130955) ] + // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 -587217045) ] + // GENERALIZED: call {{.*}} [ "kcfi"(i32 2139530422) ] fn(arg1); } @@ -44,6 +44,5 @@ void uni(void (*fn)(union Union), union Union arg1) { // UNGENERALIZED: [[TYPE3]] = !{i32 874141567} // GENERALIZED: [[TYPE3]] = !{i32 954385378} -// UNGENERALIZED: [[TYPE4]] = !{i32 981319178} -// GENERALIZED: [[TYPE4]] = !{i32 -1599950473} - +// UNGENERALIZED: [[TYPE4]] = !{i32 -1619636625} +// GENERALIZED: [[TYPE4]] = !{i32 -125078496} diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c index 9291ff8529b31..bd87f4af534a1 100644 --- a/clang/test/CodeGen/kcfi-normalize.c +++ b/clang/test/CodeGen/kcfi-normalize.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -fsanitize-cfi-icall-experimental-normalize-integers -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -fsanitize-cfi-icall-experimental-normalize-integers -x c++ -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -fsanitize-cfi-icall-experimental-normalize-integers -o - %s | FileCheck %s --check-prefixes=CHECK,C +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -fsanitize-cfi-icall-experimental-normalize-integers -x c++ -o - %s | FileCheck %s --check-prefixes=CHECK,CPP #if !__has_feature(kcfi) #error Missing kcfi? #endif @@ -36,7 +36,8 @@ union Union { void uni(void (*fn)(union Union), union Union arg1) { // CHECK-LABEL: define{{.*}}uni // CHECK-SAME: {{.*}}!kcfi_type ![[TYPE4:[0-9]+]] - // CHECK: call void %0(ptr %1) [ "kcfi"(i32 -1430221633) ] + // C: call void %0(ptr %1) [ "kcfi"(i32 1819770848) ] + // CPP: call void %0(ptr %1) [ "kcfi"(i32 -1430221633) ] fn(arg1); } @@ -44,5 +45,5 @@ void uni(void (*fn)(union Union), union Union arg1) { // CHECK: ![[TYPE1]] = !{i32 -1143117868} // CHECK: ![[TYPE2]] = !{i32 -460921415} // CHECK: ![[TYPE3]] = !{i32 -333839615} -// CHECK: ![[TYPE4]] = !{i32 1766237188} - +// C: ![[TYPE4]] = !{i32 -650530463} +// CPP: ![[TYPE4]] = !{i32 1766237188} From 4ebd2023291d47402ecd170864df9ea541ea33ba Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 12 Sep 2025 19:49:29 -0700 Subject: [PATCH 208/734] [LegalizeTypes][X86] Use getShiftAmountConstant in ExpandIntRes_SIGN_EXTEND. (#158388) This ensures we don't need to fixup the shift amount later. Unfortunately, this enabled the (SRA (SHL X, ShlConst), SraConst) -> (SRA (sext_in_reg X), SraConst - ShlConst) combine in combineShiftRightArithmetic for some cases in is_fpclass-fp80.ll. So we need to also update checkSignTestSetCCCombine to look through sign_extend_inreg to prevent a regression. --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 9 ++++----- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 87570e6f44a6f..5967b4eb3769a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -5088,9 +5088,8 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0)); // The high part is obtained by SRA'ing all but one of the bits of low part. unsigned LoSize = NVT.getSizeInBits(); - Hi = DAG.getNode( - ISD::SRA, dl, NVT, Lo, - DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getShiftAmountConstant(LoSize - 1, NVT, dl)); } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. @@ -5123,8 +5122,8 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { // The high part gets the sign extension from the lo-part. This handles // things like sextinreg V:i64 from i8. Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, - DAG.getConstant(Hi.getValueSizeInBits() - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getShiftAmountConstant(Hi.getValueSizeInBits() - 1, + Hi.getValueType(), dl)); } else { // For example, extension of an i48 to an i64. Leave the low part alone, // sext_inreg the high part. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3631016b0f5c7..eeb5eb8a262de 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48396,13 +48396,17 @@ static SDValue checkSignTestSetCCCombine(SDValue Cmp, X86::CondCode &CC, MVT SrcVT = Src.getSimpleValueType(); APInt BitMask = APInt::getSignMask(SrcVT.getScalarSizeInBits()); - // If Src came from a SHL (probably from an expanded SIGN_EXTEND_INREG), then - // peek through and adjust the TEST bit. + // If Src came from a SIGN_EXTEND_INREG or SHL (probably from an expanded + // SIGN_EXTEND_INREG), then peek through and adjust the TEST bit. if (Src.getOpcode() == ISD::SHL) { if (std::optional ShiftAmt = DAG.getValidShiftAmount(Src)) { Src = Src.getOperand(0); BitMask.lshrInPlace(*ShiftAmt); } + } else if (Src.getOpcode() == ISD::SIGN_EXTEND_INREG) { + EVT ExtVT = cast(Src.getOperand(1))->getVT(); + Src = Src.getOperand(0); + BitMask.lshrInPlace(BitMask.getBitWidth() - ExtVT.getScalarSizeInBits()); } SDValue Mask = DAG.getNode(ISD::AND, DL, SrcVT, Src, From 4cbf4408e7d27786490bae933e45e1c3fe2011ec Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 12 Sep 2025 19:49:48 -0700 Subject: [PATCH 209/734] [SelectionDAG] Use getShiftAmountConstant. (#158395) Many of the shifts in LegalizeIntegerTypes.cpp were using getPointerTy. --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 67 +++++++------------ .../SelectionDAG/LegalizeIntegerTypes.cpp | 44 ++++++------ .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 4 +- 4 files changed, 46 insertions(+), 72 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bcfc2c5dc9f83..5fb7e63cfb605 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -585,8 +585,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl); Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(RoundWidth, dl, - TLI.getShiftAmountTy(Value.getValueType(), DL))); + DAG.getShiftAmountConstant(RoundWidth, Value.getValueType(), dl)); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, ST->getBaseAlign(), MMOFlags, AAInfo); @@ -596,8 +595,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the top RoundWidth bits. Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(ExtraWidth, dl, - TLI.getShiftAmountTy(Value.getValueType(), DL))); + DAG.getShiftAmountConstant(ExtraWidth, Value.getValueType(), dl)); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, ST->getBaseAlign(), MMOFlags, AAInfo); @@ -816,8 +814,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode( ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, dl, - TLI.getShiftAmountTy(Hi.getValueType(), DL))); + DAG.getShiftAmountConstant(RoundWidth, Hi.getValueType(), dl)); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -845,8 +842,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode( ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, dl, - TLI.getShiftAmountTy(Hi.getValueType(), DL))); + DAG.getShiftAmountConstant(ExtraWidth, Hi.getValueType(), dl)); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -2767,8 +2763,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, SDValue SignBitTest = DAG.getSetCC( dl, SetCCVT, Op0, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); - EVT ShiftVT = TLI.getShiftAmountTy(SrcVT, DAG.getDataLayout()); - SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); + SDValue ShiftConst = DAG.getShiftAmountConstant(1, SrcVT, dl); SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Op0, ShiftConst); SDValue AndConst = DAG.getConstant(1, dl, SrcVT); SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Op0, AndConst); @@ -3350,10 +3345,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Op = DAG.getAnyExtOrTrunc(Op, dl, MVT::i32); } - Op = DAG.getNode( - ISD::SHL, dl, MVT::i32, Op, - DAG.getConstant(16, dl, - TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); + Op = DAG.getNode(ISD::SHL, dl, MVT::i32, Op, + DAG.getShiftAmountConstant(16, MVT::i32, dl)); Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op); // Add fp_extend in case the output is bigger than f32. if (Node->getValueType(0) != MVT::f32) @@ -3370,10 +3363,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (!DAG.isKnownNeverSNaN(Op)) { Op = DAG.getNode(ISD::FCANONICALIZE, dl, MVT::f32, Op, Node->getFlags()); } - Op = DAG.getNode( - ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op), - DAG.getConstant(16, dl, - TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); + Op = DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op), + DAG.getShiftAmountConstant(16, MVT::i32, dl)); // The result of this node can be bf16 or an integer type in case bf16 is // not supported on the target and was softened to i16 for storage. if (Node->getValueType(0) == MVT::bf16) { @@ -3431,13 +3423,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // NOTE: we could fall back on load/store here too for targets without // SRA. However, it is doubtful that any exist. - EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned BitsDiff = VT.getScalarSizeInBits() - ExtraVT.getScalarSizeInBits(); - SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); - Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), - Node->getOperand(0), ShiftCst); - Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); + SDValue ShiftCst = DAG.getShiftAmountConstant(BitsDiff, VT, dl); + Tmp1 = DAG.getNode(ISD::SHL, dl, VT, Node->getOperand(0), ShiftCst); + Tmp1 = DAG.getNode(ISD::SRA, dl, VT, Tmp1, ShiftCst); Results.push_back(Tmp1); break; } @@ -3666,11 +3656,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT OpTy = Node->getOperand(0).getValueType(); if (Node->getConstantOperandVal(1)) { // 1 -> Hi - Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), - DAG.getConstant(OpTy.getSizeInBits() / 2, dl, - TLI.getShiftAmountTy( - Node->getOperand(0).getValueType(), - DAG.getDataLayout()))); + Tmp1 = DAG.getNode( + ISD::SRL, dl, OpTy, Node->getOperand(0), + DAG.getShiftAmountConstant(OpTy.getSizeInBits() / 2, OpTy, dl)); Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); } else { // 0 -> Lo @@ -3950,9 +3938,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { for (unsigned i = 0; i < 2; ++i) { SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2 * i]); SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[2 * i + 1]); - SDValue Shift = DAG.getConstant( - HalfType.getScalarSizeInBits(), dl, - TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); + SDValue Shift = + DAG.getShiftAmountConstant(HalfType.getScalarSizeInBits(), VT, dl); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); } @@ -3999,8 +3986,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); SDValue Shift = - DAG.getConstant(HalfType.getSizeInBits(), dl, - TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); + DAG.getShiftAmountConstant(HalfType.getSizeInBits(), VT, dl); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); } @@ -4130,8 +4116,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); Tmp2 = DAG.getNode( ISD::SHL, dl, PairTy, Tmp2, - DAG.getConstant(PairTy.getSizeInBits() / 2, dl, - TLI.getShiftAmountTy(PairTy, DAG.getDataLayout()))); + DAG.getShiftAmountConstant(PairTy.getSizeInBits() / 2, PairTy, dl)); Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); break; } @@ -5368,10 +5353,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); - Tmp1 = DAG.getNode( - ISD::SRL, dl, NVT, Tmp1, - DAG.getConstant(DiffBits, dl, - TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); + Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, + DAG.getShiftAmountConstant(DiffBits, NVT, dl)); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; @@ -5483,11 +5466,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Tmp1 = DAG.getNode(ISD::MUL, dl, NVT, Tmp1, Tmp2); - auto &DL = DAG.getDataLayout(); unsigned OriginalSize = OVT.getScalarSizeInBits(); - Tmp2 = DAG.getNode( - ISD::SRL, dl, NVT, Tmp1, - DAG.getConstant(OriginalSize, dl, TLI.getScalarShiftAmountTy(DL, NVT))); + Tmp2 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, + DAG.getShiftAmountConstant(OriginalSize, NVT, dl)); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2)); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5967b4eb3769a..354aeff0c60ea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1938,9 +1938,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { for (unsigned i = 1; i < NumRegs; ++i) { SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); // Shift it to the right position and "or" it in. - Part = DAG.getNode(ISD::SHL, dl, NVT, Part, - DAG.getConstant(i * RegVT.getSizeInBits(), dl, - TLI.getPointerTy(DAG.getDataLayout()))); + Part = DAG.getNode( + ISD::SHL, dl, NVT, Part, + DAG.getShiftAmountConstant(i * RegVT.getSizeInBits(), NVT, dl)); Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); } @@ -2293,9 +2293,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); SDLoc dl(N); - Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, - DAG.getConstant(OVT.getSizeInBits(), dl, - TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode( + ISD::SHL, dl, N->getValueType(0), Hi, + DAG.getShiftAmountConstant(OVT.getSizeInBits(), N->getValueType(0), dl)); return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); } @@ -3943,8 +3943,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part replicates the sign bit of Lo, make it explicit. Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(NVTBits - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getShiftAmountConstant(NVTBits - 1, NVT, dl)); } } @@ -4329,8 +4328,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // lo part. unsigned LoSize = Lo.getValueSizeInBits(); Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getShiftAmountConstant(LoSize - 1, NVT, dl)); } else if (ExtType == ISD::ZEXTLOAD) { // The high part is just a zero. Hi = DAG.getConstant(0, dl, NVT); @@ -4391,13 +4389,12 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getNode( ISD::OR, dl, NVT, Lo, DAG.getNode(ISD::SHL, dl, NVT, Hi, - DAG.getConstant(ExcessBits, dl, - TLI.getPointerTy(DAG.getDataLayout())))); + DAG.getShiftAmountConstant(ExcessBits, NVT, dl))); // Move high bits to the right position in Hi. Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, - TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getShiftAmountConstant( + NVT.getSizeInBits() - ExcessBits, NVT, dl)); } } @@ -5165,12 +5162,12 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); SDLoc dl(N); - Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); - Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(), - N->getOperand(0), - DAG.getConstant(NVT.getSizeInBits(), dl, - TLI.getPointerTy(DAG.getDataLayout()))); + Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, InOp); + Hi = DAG.getNode(ISD::SRL, dl, InVT, InOp, + DAG.getShiftAmountConstant(NVT.getSizeInBits(), InVT, dl)); Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } @@ -5928,14 +5925,13 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (ExcessBits < NVT.getSizeInBits()) { // Transfer high bits from the top of Lo to the bottom of Hi. - Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, - TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode( + ISD::SHL, dl, NVT, Hi, + DAG.getShiftAmountConstant(NVT.getSizeInBits() - ExcessBits, NVT, dl)); Hi = DAG.getNode( ISD::OR, dl, NVT, Hi, DAG.getNode(ISD::SRL, dl, NVT, Lo, - DAG.getConstant(ExcessBits, dl, - TLI.getPointerTy(DAG.getDataLayout())))); + DAG.getShiftAmountConstant(ExcessBits, NVT, dl))); } // Store both the high bits and maybe some of the low bits. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index cc0fd7993916c..f14eeda639e71 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1001,11 +1001,10 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits()); - EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, - DAG.getConstant(LVT.getSizeInBits(), dlHi, ShiftAmtVT)); + DAG.getShiftAmountConstant(LVT.getSizeInBits(), NVT, dlHi)); return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 118fd8418f787..ff7cd665446cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5945,10 +5945,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { // interesting bits will end up at the wrong place. if (DAG.getDataLayout().isBigEndian()) { unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits(); - EVT ShiftAmtTy = TLI.getShiftAmountTy(NInVT, DAG.getDataLayout()); - assert(ShiftAmt < WidenVT.getSizeInBits() && "Too large shift amount!"); NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp, - DAG.getConstant(ShiftAmt, dl, ShiftAmtTy)); + DAG.getShiftAmountConstant(ShiftAmt, NInVT, dl)); } return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp); } From 261000760fba7ab353962fbc1a74c194acd3e097 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 12 Sep 2025 20:03:36 -0700 Subject: [PATCH 210/734] [lldb/docs] Breakdown python reference into multiple files (#158331) This pages improve the LLDB website documentation readability and discoverability by breaking down the very long python-reference page into multiple subpages each explaining a specific topic. The long term goal is to have tutorials for every scripting extension. This also converts the pages to markdown, since it's easier to write. Signed-off-by: Med Ismail Bennani --- lldb/docs/.htaccess | 1 + lldb/docs/use/python-reference.rst | 1141 +---------------- lldb/docs/use/python.rst | 799 ------------ .../use/tutorials/accessing-documentation.md | 62 + .../tutorials/automating-stepping-logic.md | 42 + .../tutorials/breakpoint-triggered-scripts.md | 85 ++ .../tutorials/creating-custom-breakpoints.md | 128 ++ .../use/tutorials/custom-frame-recognizers.md | 51 + .../tutorials/extending-target-stop-hooks.md | 25 + .../implementing-standalone-scripts.md | 134 ++ .../tutorials/python-embedded-interpreter.md | 66 + .../use/tutorials/script-driven-debugging.md | 492 +++++++ .../use/tutorials/writing-custom-commands.md | 429 +++++++ 13 files changed, 1533 insertions(+), 1922 deletions(-) delete mode 100644 lldb/docs/use/python.rst create mode 100644 lldb/docs/use/tutorials/accessing-documentation.md create mode 100644 lldb/docs/use/tutorials/automating-stepping-logic.md create mode 100644 lldb/docs/use/tutorials/breakpoint-triggered-scripts.md create mode 100644 lldb/docs/use/tutorials/creating-custom-breakpoints.md create mode 100644 lldb/docs/use/tutorials/custom-frame-recognizers.md create mode 100644 lldb/docs/use/tutorials/extending-target-stop-hooks.md create mode 100644 lldb/docs/use/tutorials/implementing-standalone-scripts.md create mode 100644 lldb/docs/use/tutorials/python-embedded-interpreter.md create mode 100644 lldb/docs/use/tutorials/script-driven-debugging.md create mode 100644 lldb/docs/use/tutorials/writing-custom-commands.md diff --git a/lldb/docs/.htaccess b/lldb/docs/.htaccess index f094bd6ebc783..34e7fcb8f5516 100644 --- a/lldb/docs/.htaccess +++ b/lldb/docs/.htaccess @@ -19,6 +19,7 @@ Redirect 301 /resources/architecture.html https://lldb.llvm.org/resources/overvi Redirect 301 /design/sbapi.html https://lldb.llvm.org/resources/sbapi.html Redirect 301 /design/overview.html https://lldb.llvm.org/resources/overview.html Redirect 301 /use/extensions.html https://lldb.llvm.org/resources/extensions.html +Redirect 301 /use/python.html https://lldb.llvm.org/use/tutorials/script-driven-debugging.html Redirect 301 /resources/bots.html https://lldb.llvm.org/resources/test.html # Redirect old Python API to new Python API. diff --git a/lldb/docs/use/python-reference.rst b/lldb/docs/use/python-reference.rst index 4292714c9c208..6ac2ec93fbd1f 100644 --- a/lldb/docs/use/python-reference.rst +++ b/lldb/docs/use/python-reference.rst @@ -10,1126 +10,21 @@ command interpreter (we refer to this for brevity as the embedded interpreter). Of course, in this context it has full access to the LLDB API - with some additional conveniences we will call out in the FAQ. -Documentation --------------- - -The LLDB API is contained in a python module named lldb. A useful resource when -writing Python extensions is the lldb Python classes reference guide. - -The documentation is also accessible in an interactive debugger session with -the following command: - -:: - - (lldb) script help(lldb) - Help on package lldb: - - NAME - lldb - The lldb module contains the public APIs for Python binding. - - FILE - /System/Library/PrivateFrameworks/LLDB.framework/Versions/A/Resources/Python/lldb/__init__.py - - DESCRIPTION - ... - -You can also get help using a module class name. The full API that is exposed -for that class will be displayed in a man page style window. Below we want to -get help on the lldb.SBFrame class: - -:: - - (lldb) script help(lldb.SBFrame) - Help on class SBFrame in module lldb: - - class SBFrame(__builtin__.object) - | Represents one of the stack frames associated with a thread. - | SBThread contains SBFrame(s). For example (from test/lldbutil.py), - | - | def print_stacktrace(thread, string_buffer = False): - | '''Prints a simple stack trace of this thread.''' - | - ... - -Or you can get help using any python object, here we use the lldb.process -object which is a global variable in the lldb module which represents the -currently selected process: - -:: - - (lldb) script help(lldb.process) - Help on SBProcess in module lldb object: - - class SBProcess(__builtin__.object) - | Represents the process associated with the target program. - | - | SBProcess supports thread iteration. For example (from test/lldbutil.py), - | - | # ================================================== - | # Utility functions related to Threads and Processes - | # ================================================== - | - ... - -Embedded Python Interpreter ---------------------------- - -The embedded python interpreter can be accessed in a variety of ways from -within LLDB. The easiest way is to use the lldb command script with no -arguments at the lldb command prompt: - -:: - - (lldb) script - Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. - >>> 2+3 - 5 - >>> hex(12345) - '0x3039' - >>> - -This drops you into the embedded python interpreter. When running under the -script command, lldb sets some convenience variables that give you quick access -to the currently selected entities that characterize the program and debugger -state. In each case, if there is no currently selected entity of the -appropriate type, the variable's IsValid method will return false. These -variables are: - -+-------------------+---------------------+-------------------------------------+-------------------------------------------------------------------------------------+ -| Variable | Type | Equivalent | Description | -+-------------------+---------------------+-------------------------------------+-------------------------------------------------------------------------------------+ -| ``lldb.debugger`` | `lldb.SBDebugger` | `SBTarget.GetDebugger` | Contains the debugger object whose ``script`` command was invoked. | -| | | | The `lldb.SBDebugger` object owns the command interpreter | -| | | | and all the targets in your debug session. There will always be a | -| | | | Debugger in the embedded interpreter. | -+-------------------+---------------------+-------------------------------------+-------------------------------------------------------------------------------------+ -| ``lldb.target`` | `lldb.SBTarget` | `SBDebugger.GetSelectedTarget` | Contains the currently selected target - for instance the one made with the | -| | | | ``file`` or selected by the ``target select `` command. | -| | | `SBProcess.GetTarget` | The `lldb.SBTarget` manages one running process, and all the executable | -| | | | and debug files for the process. | -+-------------------+---------------------+-------------------------------------+-------------------------------------------------------------------------------------+ -| ``lldb.process`` | `lldb.SBProcess` | `SBTarget.GetProcess` | Contains the process of the currently selected target. | -| | | | The `lldb.SBProcess` object manages the threads and allows access to | -| | | `SBThread.GetProcess` | memory for the process. | -+-------------------+---------------------+-------------------------------------+-------------------------------------------------------------------------------------+ -| ``lldb.thread`` | `lldb.SBThread` | `SBProcess.GetSelectedThread` | Contains the currently selected thread. | -| | | | The `lldb.SBThread` object manages the stack frames in that thread. | -| | | `SBFrame.GetThread` | A thread is always selected in the command interpreter when a target stops. | -| | | | The ``thread select `` command can be used to change the | -| | | | currently selected thread. So as long as you have a stopped process, there will be | -| | | | some selected thread. | -+-------------------+---------------------+-------------------------------------+-------------------------------------------------------------------------------------+ -| ``lldb.frame`` | `lldb.SBFrame` | `SBThread.GetSelectedFrame` | Contains the currently selected stack frame. | -| | | | The `lldb.SBFrame` object manage the stack locals and the register set for | -| | | | that stack. | -| | | | A stack frame is always selected in the command interpreter when a target stops. | -| | | | The ``frame select `` command can be used to change the | -| | | | currently selected frame. So as long as you have a stopped process, there will | -| | | | be some selected frame. | -+-------------------+---------------------+-------------------------------------+-------------------------------------------------------------------------------------+ - -While extremely convenient, these variables have a couple caveats that you -should be aware of. First of all, they hold the values of the selected objects -on entry to the embedded interpreter. They do not update as you use the LLDB -API's to change, for example, the currently selected stack frame or thread. - -Moreover, they are only defined and meaningful while in the interactive Python -interpreter. There is no guarantee on their value in any other situation, hence -you should not use them when defining Python formatters, breakpoint scripts and -commands (or any other Python extension point that LLDB provides). For the -latter you'll be passed an `SBDebugger`, `SBTarget`, `SBProcess`, `SBThread` or -`SBFrame` instance and you can use the functions from the "Equivalent" column -to navigate between them. - -As a rationale for such behavior, consider that lldb can run in a multithreaded -environment, and another thread might call the "script" command, changing the -value out from under you. - -To get started with these objects and LLDB scripting, please note that almost -all of the lldb Python objects are able to briefly describe themselves when you -pass them to the Python print function: - -:: - - (lldb) script - Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. - >>> print(lldb.debugger) - Debugger (instance: "debugger_1", id: 1) - >>> print(lldb.target) - a.out - >>> print(lldb.process) - SBProcess: pid = 58842, state = stopped, threads = 1, executable = a.out - >>> print(lldb.thread) - thread #1: tid = 0x2265ce3, 0x0000000100000334 a.out`main at t.c:2:3, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1 - >>> print(lldb.frame) - frame #0: 0x0000000100000334 a.out`main at t.c:2:3 - - -Running a python script when a breakpoint gets hit --------------------------------------------------- - -One very powerful use of the lldb Python API is to have a python script run -when a breakpoint gets hit. Adding python scripts to breakpoints provides a way -to create complex breakpoint conditions and also allows for smart logging and -data gathering. - -When your process hits a breakpoint to which you have attached some python -code, the code is executed as the body of a function which takes three -arguments: - -:: - - def breakpoint_function_wrapper(frame, bp_loc, internal_dict): - # Your code goes here - -or: - -:: - - def breakpoint_function_wrapper(frame, bp_loc, extra_args, internal_dict): - # Your code goes here - - -+-------------------+-------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Argument | Type | Description | -+-------------------+-------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| ``frame`` | `lldb.SBFrame` | The current stack frame where the breakpoint got hit. | -| | | The object will always be valid. | -| | | This ``frame`` argument might *not* match the currently selected stack frame found in the `lldb` module global variable ``lldb.frame``. | -+-------------------+-------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| ``bp_loc`` | `lldb.SBBreakpointLocation` | The breakpoint location that just got hit. Breakpoints are represented by `lldb.SBBreakpoint` | -| | | objects. These breakpoint objects can have one or more locations. These locations | -| | | are represented by `lldb.SBBreakpointLocation` objects. | -+-------------------+-------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| ``extra_args`` | `lldb.SBStructuredData` | ``Optional`` If your breakpoint callback function takes this extra parameter, then when the callback gets added to a breakpoint, its | -| | | contents can parametrize this use of the callback. For instance, instead of writing a callback that stops when the caller is "Foo", | -| | | you could take the function name from a field in the ``extra_args``, making the callback more general. The ``-k`` and ``-v`` options | -| | | to ``breakpoint command add`` will be passed as a Dictionary in the ``extra_args`` parameter, or you can provide it with the SB API's. | -+-------------------+-------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| ``internal_dict`` | ``dict`` | The python session dictionary as a standard python dictionary object. | -+-------------------+-------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ - -Optionally, a Python breakpoint command can return a value. Returning False -tells LLDB that you do not want to stop at the breakpoint. Any other return -value (including None or leaving out the return statement altogether) is akin -to telling LLDB to actually stop at the breakpoint. This can be useful in -situations where a breakpoint only needs to stop the process when certain -conditions are met, and you do not want to inspect the program state manually -at every stop and then continue. - -An example will show how simple it is to write some python code and attach it -to a breakpoint. The following example will allow you to track the order in -which the functions in a given shared library are first executed during one run -of your program. This is a simple method to gather an order file which can be -used to optimize function placement within a binary for execution locality. - -We do this by setting a regular expression breakpoint that will match every -function in the shared library. The regular expression '.' will match any -string that has at least one character in it, so we will use that. This will -result in one lldb.SBBreakpoint object that contains an -lldb.SBBreakpointLocation object for each function. As the breakpoint gets hit, -we use a counter to track the order in which the function at this particular -breakpoint location got hit. Since our code is passed the location that was -hit, we can get the name of the function from the location, disable the -location so we won't count this function again; then log some info and continue -the process. - -Note we also have to initialize our counter, which we do with the simple -one-line version of the script command. - -Here is the code: - -:: - - (lldb) breakpoint set --func-regex=. --shlib=libfoo.dylib - Breakpoint created: 1: regex = '.', module = libfoo.dylib, locations = 223 - (lldb) script counter = 0 - (lldb) breakpoint command add --script-type python 1 - Enter your Python command(s). Type 'DONE' to end. - > # Increment our counter. Since we are in a function, this must be a global python variable - > global counter - > counter += 1 - > # Get the name of the function - > name = frame.GetFunctionName() - > # Print the order and the function name - > print('[%i] %s' % (counter, name)) - > # Disable the current breakpoint location so it doesn't get hit again - > bp_loc.SetEnabled(False) - > # No need to stop here - > return False - > DONE - -The breakpoint command add command above attaches a python script to breakpoint 1. To remove the breakpoint command: - -:: - - (lldb) breakpoint command delete 1 - - -Using the python api's to create custom breakpoints ---------------------------------------------------- - - -Another use of the Python API's in lldb is to create a custom breakpoint -resolver. This facility was added in r342259. - -It allows you to provide the algorithm which will be used in the breakpoint's -search of the space of the code in a given Target to determine where to set the -breakpoint locations - the actual places where the breakpoint will trigger. To -understand how this works you need to know a little about how lldb handles -breakpoints. - -In lldb, a breakpoint is composed of three parts: the Searcher, the Resolver, -and the Stop Options. The Searcher and Resolver cooperate to determine how -breakpoint locations are set and differ between each breakpoint type. Stop -options determine what happens when a location triggers and includes the -commands, conditions, ignore counts, etc. Stop options are common between all -breakpoint types, so for our purposes only the Searcher and Resolver are -relevant. - -The Searcher's job is to traverse in a structured way the code in the current -target. It proceeds from the Target, to search all the Modules in the Target, -in each Module it can recurse into the Compile Units in that module, and within -each Compile Unit it can recurse over the Functions it contains. - -The Searcher can be provided with a SearchFilter that it will use to restrict -this search. For instance, if the SearchFilter specifies a list of Modules, the -Searcher will not recurse into Modules that aren't on the list. When you pass -the -s modulename flag to break set you are creating a Module-based search -filter. When you pass -f filename.c to break set -n you are creating a file -based search filter. If neither of these is specified, the breakpoint will have -a no-op search filter, so all parts of the program are searched and all -locations accepted. - -The Resolver has two functions. The most important one is the callback it -provides. This will get called at the appropriate time in the course of the -search. The callback is where the job of adding locations to the breakpoint -gets done. - -The other function is specifying to the Searcher at what depth in the above -described recursion it wants to be called. Setting a search depth also provides -a stop for the recursion. For instance, if you request a Module depth search, -then the callback will be called for each Module as it gets added to the -Target, but the searcher will not recurse into the Compile Units in the module. - -One other slight subtlety is that the depth at which you get called back is not -necessarily the depth at which the SearchFilter is specified. For instance, -if you are doing symbol searches, it is convenient to use the Module depth for -the search, since symbols are stored in the module. But the SearchFilter might -specify some subset of CompileUnits, so not all the symbols you might find in -each module will pass the search. You don't need to handle this situation -yourself, since SBBreakpoint::AddLocation will only add locations that pass the -Search Filter. This API returns an SBError to inform you whether your location -was added. - -When the breakpoint is originally created, its Searcher will process all the -currently loaded modules. The Searcher will also visit any new modules as they -are added to the target. This happens, for instance, when a new shared library -gets added to the target in the course of running, or on rerunning if any of -the currently loaded modules have been changed. Note, in the latter case, all -the locations set in the old module will get deleted and you will be asked to -recreate them in the new version of the module when your callback gets called -with that module. For this reason, you shouldn't try to manage the locations -you add to the breakpoint yourself. Note that the Breakpoint takes care of -deduplicating equal addresses in AddLocation, so you shouldn't need to worry -about that anyway. - -At present, when adding a scripted Breakpoint type, you can only provide a -custom Resolver, not a custom SearchFilter. - -The custom Resolver is provided as a Python class with the following methods: - -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ -| Name | Arguments | Description | -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ -| ``__init__`` | ``bkpt``:`lldb.SBBreakpoint` | This is the constructor for the new Resolver. | -| | ``extra_args``:`lldb.SBStructuredData`| | -| | | | -| | | ``bkpt`` is the breakpoint owning this Resolver. | -| | | | -| | | | -| | | ``extra_args`` is an `SBStructuredData` object that the user can pass in when creating instances of this | -| | | breakpoint. It is not required, but is quite handy. For instance if you were implementing a breakpoint on some | -| | | symbol name, you could write a generic symbol name based Resolver, and then allow the user to pass | -| | | in the particular symbol in the extra_args | -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ -| ``__callback__`` | ``sym_ctx``:`lldb.SBSymbolContext` | This is the Resolver callback. | -| | | The ``sym_ctx`` argument will be filled with the current stage | -| | | of the search. | -| | | | -| | | | -| | | For instance, if you asked for a search depth of lldb.eSearchDepthCompUnit, then the | -| | | target, module and compile_unit fields of the sym_ctx will be filled. The callback should look just in the | -| | | context passed in ``sym_ctx`` for new locations. If the callback finds an address of interest, it | -| | | can add it to the breakpoint with the `SBBreakpoint.AddLocation` method, using the breakpoint passed | -| | | in to the ``__init__`` method. | -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ -| ``__get_depth__`` | ``None`` | Specify the depth at which you wish your callback to get called. The currently supported options are: | -| | | | -| | | `lldb.eSearchDepthModule` | -| | | `lldb.eSearchDepthCompUnit` | -| | | `lldb.eSearchDepthFunction` | -| | | | -| | | For instance, if you are looking | -| | | up symbols, which are stored at the Module level, you will want to get called back module by module. | -| | | So you would want to return `lldb.eSearchDepthModule`. This method is optional. If not provided the search | -| | | will be done at Module depth. | -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ -| ``get_short_help`` | ``None`` | This is an optional method. If provided, the returned string will be printed at the beginning of | -| | | the description for this breakpoint. | -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ - -To define a new breakpoint command defined by this class from the lldb command -line, use the command: - -:: - - (lldb) breakpoint set -P MyModule.MyResolverClass - -You can also populate the extra_args SBStructuredData with a dictionary of -key/value pairs with: - -:: - - (lldb) breakpoint set -P MyModule.MyResolverClass -k key_1 -v value_1 -k key_2 -v value_2 - -Although you can't write a scripted SearchFilter, both the command line and the -SB API's for adding a scripted resolver allow you to specify a SearchFilter -restricted to certain modules or certain compile units. When using the command -line to create the resolver, you can specify a Module specific SearchFilter by -passing the -s ModuleName option - which can be specified multiple times. You -can also specify a SearchFilter restricted to certain compile units by passing -in the -f CompUnitName option. This can also be specified more than once. And -you can mix the two to specify "this comp unit in this module". So, for -instance, - -:: - - (lldb) breakpoint set -P MyModule.MyResolverClass -s a.out - -will use your resolver, but will only recurse into or accept new locations in -the module a.out. - -Another option for creating scripted breakpoints is to use the -SBTarget.BreakpointCreateFromScript API. This one has the advantage that you -can pass in an arbitrary SBStructuredData object, so you can create more -complex parametrizations. SBStructuredData has a handy SetFromJSON method which -you can use for this purpose. Your __init__ function gets passed this -SBStructuredData object. This API also allows you to directly provide the list -of Modules and the list of CompileUnits that will make up the SearchFilter. If -you pass in empty lists, the breakpoint will use the default "search -everywhere,accept everything" filter. - -Using the python API' to create custom stepping logic ------------------------------------------------------ - -A slightly esoteric use of the Python API's is to construct custom stepping -types. LLDB's stepping is driven by a stack of "thread plans" and a fairly -simple state machine that runs the plans. You can create a Python class that -works as a thread plan, and responds to the requests the state machine makes to -run its operations. - -There is a longer discussion of scripted thread plans and the state machine, -and several interesting examples of their use in: - -https://github.com/llvm/llvm-project/blob/main/lldb/examples/python/scripted_step.py - -And for a MUCH fuller discussion of the whole state machine, see: - -https://github.com/llvm/llvm-project/blob/main/lldb/include/lldb/Target/ThreadPlan.h - -If you are reading those comments it is useful to know that scripted thread -plans are set to be "ControllingPlans", and not "OkayToDiscard". - -To implement a scripted step, you define a python class that has the following -methods: - -+-------------------+------------------------------------+---------------------------------------------------------------------------------------+ -| Name | Arguments | Description | -+-------------------+------------------------------------+---------------------------------------------------------------------------------------+ -| ``__init__`` | ``thread_plan``:`lldb.SBThreadPlan`| This is the underlying `SBThreadPlan` that is pushed onto the plan stack. | -| | | You will want to store this away in an ivar. Also, if you are going to | -| | | use one of the canned thread plans, you can queue it at this point. | -+-------------------+------------------------------------+---------------------------------------------------------------------------------------+ -| ``explains_stop`` | ``event``: `lldb.SBEvent` | Return True if this stop is part of your thread plans logic, false otherwise. | -+-------------------+------------------------------------+---------------------------------------------------------------------------------------+ -| ``is_stale`` | ``None`` | If your plan is no longer relevant (for instance, you were | -| | | stepping in a particular stack frame, but some other operation | -| | | pushed that frame off the stack) return True and your plan will | -| | | get popped. | -+-------------------+------------------------------------+---------------------------------------------------------------------------------------+ -| ``should_step`` | ``None`` | Return ``True`` if you want lldb to instruction step one instruction, | -| | | or False to continue till the next breakpoint is hit. | -+-------------------+------------------------------------+---------------------------------------------------------------------------------------+ -| ``should_stop`` | ``event``: `lldb.SBEvent` | If your plan wants to stop and return control to the user at this point, return True. | -| | | If your plan is done at this point, call SetPlanComplete on your | -| | | thread plan instance. | -| | | Also, do any work you need here to set up the next stage of stepping. | -+-------------------+------------------------------------+---------------------------------------------------------------------------------------+ - -To use this class to implement a step, use the command: - -:: - - (lldb) thread step-scripted -C MyModule.MyStepPlanClass - -Or use the SBThread.StepUsingScriptedThreadPlan API. The SBThreadPlan passed -into your __init__ function can also push several common plans (step -in/out/over and run-to-address) in front of itself on the stack, which can be -used to compose more complex stepping operations. When you use subsidiary plans -your explains_stop and should_stop methods won't get called until the -subsidiary plan is done, or the process stops for an event the subsidiary plan -doesn't explain. For instance, step over plans don't explain a breakpoint hit -while performing the step-over. - - -Create a new lldb command using a Python function -------------------------------------------------- - -Python functions can be used to create new LLDB command interpreter commands, -which will work like all the natively defined lldb commands. This provides a -very flexible and easy way to extend LLDB to meet your debugging requirements. - -To write a python function that implements a new LLDB command define the -function to take five arguments as follows: - -:: - - def command_function(debugger, command, exe_ctx, result, internal_dict): - # Your code goes here - -The meaning of the arguments is given in the table below. - -If you provide a Python docstring in your command function LLDB will use it -when providing "long help" for your command, as in: - -:: - - def command_function(debugger, command, result, internal_dict): - """This command takes a lot of options and does many fancy things""" - # Your code goes here - -though providing help can also be done programmatically (see below). - -Prior to lldb 3.5.2 (April 2015), LLDB Python command definitions didn't take the SBExecutionContext -argument. So you may still see commands where the command definition is: - -:: - - def command_function(debugger, command, result, internal_dict): - # Your code goes here - -Using this form is strongly discouraged because it can only operate on the "currently selected" -target, process, thread, frame. The command will behave as expected when run -directly on the command line. But if the command is used in a stop-hook, breakpoint -callback, etc. where the response to the callback determines whether we will select -this or that particular process/frame/thread, the global "currently selected" -entity is not necessarily the one the callback is meant to handle. In that case, this -command definition form can't do the right thing. - -+-------------------+--------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ -| Argument | Type | Description | -+-------------------+--------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ -| ``debugger`` | `lldb.SBDebugger` | The current debugger object. | -+-------------------+--------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ -| ``command`` | ``python string`` | A python string containing all arguments for your command. If you need to chop up the arguments | -| | | try using the ``shlex`` module's ``shlex.split(command)`` to properly extract the | -| | | arguments. | -+-------------------+--------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ -| ``exe_ctx`` | `lldb.SBExecutionContext` | An execution context object carrying around information on the inferior process' context in which the command is expected to act | -| | | | -| | | *Optional since lldb 3.5.2, unavailable before* | -+-------------------+--------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ -| ``result`` | `lldb.SBCommandReturnObject` | A return object which encapsulates success/failure information for the command and output text | -| | | that needs to be printed as a result of the command. The plain Python "print" command also works but | -| | | text won't go in the result by default (it is useful as a temporary logging facility). | -+-------------------+--------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ -| ``internal_dict`` | ``python dict object`` | The dictionary for the current embedded script session which contains all variables | -| | | and functions. | -+-------------------+--------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - -Since lldb 3.7, Python commands can also be implemented by means of a class -which should implement the following interface: - -.. code-block:: python - - class CommandObjectType: - def __init__(self, debugger, internal_dict): - this call should initialize the command with respect to the command interpreter for the passed-in debugger - def __call__(self, debugger, command, exe_ctx, result): - this is the actual bulk of the command, akin to Python command functions - def get_short_help(self): - this call should return the short help text for this command[1] - def get_long_help(self): - this call should return the long help text for this command[1] - def get_flags(self): - this will be called when the command is added to the command interpreter, - and should return a flag field made from or-ing together the appropriate - elements of the lldb.CommandFlags enum to specify the requirements of this command. - The CommandInterpreter will make sure all these requirements are met, and will - return the standard lldb error if they are not.[1] - def get_repeat_command(self, command): - The auto-repeat command is what will get executed when the user types just - a return at the next prompt after this command is run. Even if your command - was run because it was specified as a repeat command, that invocation will still - get asked for IT'S repeat command, so you can chain a series of repeats, for instance - to implement a pager. - - The command argument is the command that is about to be executed. - - If this call returns None, then the ordinary repeat mechanism will be used - If this call returns an empty string, then auto-repeat is disabled - If this call returns any other string, that will be the repeat command [1] - -[1] This method is optional. - -As a convenience, you can treat the result object as a Python file object, and -say - -.. code-block:: python - - print("my command does lots of cool stuff", file=result) - -SBCommandReturnObject and SBStream both support this file-like behavior by -providing write() and flush() calls at the Python layer. - -The commands that are added using this class definition are what lldb calls -"raw" commands. The command interpreter doesn't attempt to parse the command, -doesn't handle option values, neither generating help for them, or their -completion. Raw commands are useful when the arguments passed to the command -are unstructured, and having to protect them against lldb command parsing would -be onerous. For instance, "expr" is a raw command. - -You can also add scripted commands that implement the "parsed command", where -the options and their types are specified, as well as the argument and argument -types. These commands look and act like the majority of lldb commands, and you -can also add custom completions for the options and/or the arguments if you have -special needs. - -The easiest way to do this is to derive your new command from the lldb.ParsedCommand -class. That responds in the same way to the help & repeat command interfaces, and -provides some convenience methods, and most importantly an LLDBOptionValueParser, -accessed through lldb.ParsedCommand.get_parser(). The parser is used to set -your command definitions, and to retrieve option values in the __call__ method. - -To set up the command definition, implement the ParsedCommand abstract method: - -.. code-block:: python - - def setup_command_definition(self): - -This is called when your command is added to lldb. In this method you add the -options and their types, the option help strings, etc. to the command using the API: - -.. code-block:: python - - def add_option(self, short_option, long_option, help, default, - dest = None, required=False, groups = None, - value_type=lldb.eArgTypeNone, completion_type=None, - enum_values=None): - """ - short_option: one character, must be unique, not required - long_option: no spaces, must be unique, required - help: a usage string for this option, will print in the command help - default: the initial value for this option (if it has a value) - dest: the name of the property that gives you access to the value for - this value. Defaults to the long option if not provided. - required: if true, this option must be provided or the command will error out - groups: Which "option groups" does this option belong to. This can either be - a simple list (e.g. [1, 3, 4, 5]) or you can specify ranges by sublists: - so [1, [3,5]] is the same as [1, 3, 4, 5]. - value_type: one of the lldb.eArgType enum values. Some of the common arg - types also have default completers, which will be applied automatically. - completion_type: currently these are values form the lldb.CompletionType enum. If - you need custom completions, implement handle_option_argument_completion. - enum_values: An array of duples: ["element_name", "element_help"]. If provided, - only one of the enum elements is allowed. The value will be the - element_name for the chosen enum element as a string. - """ - -Similarly, you can add argument types to the command: - -.. code-block:: python - - def make_argument_element(self, arg_type, repeat = "optional", groups = None): - """ - arg_type: The argument type, one of the lldb.eArgType enum values. - repeat: Choose from the following options: - "plain" - one value - "optional" - zero or more values - "plus" - one or more values - groups: As with add_option. - """ - -Then implement the body of the command by defining: - -.. code-block:: python - - def __call__(self, debugger, args_array, exe_ctx, result): - """This is the command callback. The option values are - provided by the 'dest' properties on the parser. - - args_array: This is the list of arguments provided. - exe_ctx: Gives the SBExecutionContext on which the - command should operate. - result: Any results of the command should be - written into this SBCommandReturnObject. - """ - -This differs from the "raw" command's __call__ in that the arguments are already -parsed into the args_array, and the option values are set in the parser, and -can be accessed using their property name. The LLDBOptionValueParser class has -a couple of other handy methods: - -.. code-block:: python - def was_set(self, long_option_name): - -returns True if the option was specified on the command line. - -.. code-block:: python - - def dest_for_option(self, long_option_name): - """ - This will return the value of the dest variable you defined for opt_name. - Mostly useful for handle_completion where you get passed the long option. - """ - -lldb will handle completing your option names, and all your enum values -automatically. If your option or argument types have associated built-in completers, -then lldb will also handle that completion for you. But if you have a need for -custom completions, either in your arguments or option values, you can handle -completion by hand as well. To handle completion of option value arguments, -your lldb.ParsedCommand subclass should implement: - -.. code-block:: python - - def handle_option_argument_completion(self, long_option, cursor_pos): - """ - long_option: The long option name of the option whose value you are - asked to complete. - cursor_pos: The cursor position in the value for that option - which - you can get from the option parser. - """ - -And to handle the completion of arguments: - -.. code-block:: python - - def handle_argument_completion(self, args, arg_pos, cursor_pos): - """ - args: A list of the arguments to the command - arg_pos: An index into the args list of the argument with the cursor - cursor_pos: The cursor position in the arg specified by arg_pos - """ - -When either of these API's is called, the command line will have been parsed up to -the word containing the cursor, and any option values set in that part of the command -string are available from the option value parser. That's useful for instance -if you have a --shared-library option that would constrain the completions for, -say, a symbol name option or argument. - -The return value specifies what the completion options are. You have four -choices: - -- `True`: the completion was handled with no completions. - -- `False`: the completion was not handled, forward it to the regular -completion machinery. - -- A dictionary with the key: "completion": there is one candidate, -whose value is the value of the "completion" key. Optionally you can pass a -"mode" key whose value is either "partial" or "complete". Return partial if -the "completion" string is a prefix for all the completed value. - -For instance, if the string you are completing is "Test" and the available completions are: -"Test1", "Test11" and "Test111", you should return the dictionary: - -.. code-block:: python - - return {"completion": "Test1", "mode" : "partial"} - -and then lldb will add the "1" at the cursor and advance it after the added string, -waiting for more completions. But if "Test1" is the only completion, return: - -.. code-block:: python - - {"completion": "Test1", "mode": "complete"} - -and lldb will add "1 " at the cursor, indicating the command string is complete. - -The default is "complete", you don't need to specify a "mode" in that case. - -- A dictionary with the key: "values" whose value is a list of candidate completion -strings. The command interpreter will present those strings as the available choices. -You can optionally include a "descriptions" key, whose value is a parallel array -of description strings, and the completion will show the description next to -each completion. - - -One other handy convenience when defining lldb command-line commands is the -command "command script import" which will import a module specified by file -path, so you don't have to change your PYTHONPATH for temporary scripts. It -also has another convenience that if your new script module has a function of -the form: - -.. code-block python - - def __lldb_init_module(debugger, internal_dict): - # Command Initialization code goes here - -where debugger and internal_dict are as above, that function will get run when -the module is loaded allowing you to add whatever commands you want into the -current debugger. Note that this function will only be run when using the LLDB -command ``command script import``, it will not get run if anyone imports your -module from another module. - -The standard test for ``__main__``, like many python modules do, is useful for -creating scripts that can be run from the command line. However, for command -line scripts, the debugger instance must be created manually. Sample code would -look like: - -.. code-block:: python - - if __name__ == '__main__': - # Initialize the debugger before making any API calls. - lldb.SBDebugger.Initialize() - # Create a new debugger instance in your module if your module - # can be run from the command line. When we run a script from - # the command line, we won't have any debugger object in - # lldb.debugger, so we can just create it if it will be needed - debugger = lldb.SBDebugger.Create() - - # Next, do whatever work this module should do when run as a command. - # ... - - # Finally, dispose of the debugger you just made. - lldb.SBDebugger.Destroy(debugger) - # Terminate the debug session - lldb.SBDebugger.Terminate() - - -Now we can create a module called ls.py in the file ~/ls.py that will implement -a function that can be used by LLDB's python command code: - -.. code-block:: python - - #!/usr/bin/env python - - import lldb - import commands - import optparse - import shlex - - def ls(debugger, command, result, internal_dict): - print >>result, (commands.getoutput('/bin/ls %s' % command)) - - # And the initialization code to add your commands - def __lldb_init_module(debugger, internal_dict): - debugger.HandleCommand('command script add -f ls.ls ls') - print('The "ls" python command has been installed and is ready for use.') - -Now we can load the module into LLDB and use it - -:: - - $ lldb - (lldb) command script import ~/ls.py - The "ls" python command has been installed and is ready for use. - (lldb) ls -l /tmp/ - total 365848 - -rw-r--r--@ 1 someuser wheel 6148 Jan 19 17:27 .DS_Store - -rw------- 1 someuser wheel 7331 Jan 19 15:37 crash.log - -You can also make "container" commands to organize the commands you are adding to -lldb. Most of the lldb built-in commands structure themselves this way, and using -a tree structure has the benefit of leaving the one-word command space free for user -aliases. It can also make it easier to find commands if you are adding more than -a few of them. Here's a trivial example of adding two "utility" commands into a -"my-utilities" container: - -:: - - #!/usr/bin/env python - - import lldb - - def first_utility(debugger, command, result, internal_dict): - print("I am the first utility") - - def second_utility(debugger, command, result, internal_dict): - print("I am the second utility") - - # And the initialization code to add your commands - def __lldb_init_module(debugger, internal_dict): - debugger.HandleCommand('command container add -h "A container for my utilities" my-utilities') - debugger.HandleCommand('command script add -f my_utilities.first_utility -h "My first utility" my-utilities first') - debugger.HandleCommand('command script add -f my_utilities.second_utility -h "My second utility" my-utilities second') - print('The "my-utilities" python command has been installed and its subcommands are ready for use.') - -Then your new commands are available under the my-utilities node: - -:: - - (lldb) help my-utilities - A container for my utilities - - Syntax: my-utilities - - The following subcommands are supported: - - first -- My first utility Expects 'raw' input (see 'help raw-input'.) - second -- My second utility Expects 'raw' input (see 'help raw-input'.) - - For more help on any particular subcommand, type 'help '. - (lldb) my-utilities first - I am the first utility - - -A more interesting template has been created in the source repository that can -help you to create lldb command quickly: - -https://github.com/llvm/llvm-project/blob/main/lldb/examples/python/cmdtemplate.py - -A commonly required facility is being able to create a command that does some -token substitution, and then runs a different debugger command (usually, it -po'es the result of an expression evaluated on its argument). For instance, -given the following program: - -:: - - #import - NSString* - ModifyString(NSString* src) - { - return [src stringByAppendingString:@"foobar"]; - } - - int main() - { - NSString* aString = @"Hello world"; - NSString* anotherString = @"Let's be friends"; - return 1; - } - -you may want a pofoo X command, that equates po [ModifyString(X) -capitalizedString]. The following debugger interaction shows how to achieve -that goal: - -:: - - (lldb) script - Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. - >>> def pofoo_funct(debugger, command, result, internal_dict): - ... cmd = "po [ModifyString(" + command + ") capitalizedString]" - ... debugger.HandleCommand(cmd) - ... - >>> ^D - (lldb) command script add pofoo -f pofoo_funct - (lldb) pofoo aString - $1 = 0x000000010010aa00 Hello Worldfoobar - (lldb) pofoo anotherString - $2 = 0x000000010010aba0 Let's Be Friendsfoobar - -Using the lldb.py module in Python ----------------------------------- - -LLDB has all of its core code built into a shared library which gets used by -the `lldb` command line application. On macOS this shared library is a -framework: LLDB.framework and on other unix variants the program is a shared -library: lldb.so. LLDB also provides an lldb.py module that contains the -bindings from LLDB into Python. To use the LLDB.framework to create your own -stand-alone python programs, you will need to tell python where to look in -order to find this module. This is done by setting the PYTHONPATH environment -variable, adding a path to the directory that contains the lldb.py python -module. The lldb driver program has an option to report the path to the lldb -module. You can use that to point to correct lldb.py: - -For csh and tcsh: - -:: - - % setenv PYTHONPATH `lldb -P` - -For sh and bash: - -:: - - $ export PYTHONPATH=`lldb -P` - -Alternatively, you can append the LLDB Python directory to the sys.path list -directly in your Python code before importing the lldb module. - -Now your python scripts are ready to import the lldb module. Below is a python -script that will launch a program from the current working directory called -"a.out", set a breakpoint at "main", and then run and hit the breakpoint, and -print the process, thread and frame objects if the process stopped: - -.. code-block:: python - - #!/usr/bin/env python3 - - import lldb - import os - - - def disassemble_instructions(insts): - for i in insts: - print(i) - - - # Set the path to the executable to debug - exe = "./a.out" - - # Create a new debugger instance - debugger = lldb.SBDebugger.Create() - - # When we step or continue, don't return from the function until the process - # stops. Otherwise we would have to handle the process events ourselves which, while doable is - # a little tricky. We do this by setting the async mode to false. - debugger.SetAsync(False) - - # Create a target from a file and arch - print("Creating a target for '%s'" % exe) - - target = debugger.CreateTargetWithFileAndArch(exe, lldb.LLDB_ARCH_DEFAULT) - - if target: - # If the target is valid set a breakpoint at main - main_bp = target.BreakpointCreateByName( - "main", target.GetExecutable().GetFilename() - ) - - print(main_bp) - - # Launch the process. Since we specified synchronous mode, we won't return - # from this function until we hit the breakpoint at main - process = target.LaunchSimple(None, None, os.getcwd()) - - # Make sure the launch went ok - if process: - # Print some simple process info - state = process.GetState() - print(process) - if state == lldb.eStateStopped: - # Get the first thread - thread = process.GetThreadAtIndex(0) - if thread: - # Print some simple thread info - print(thread) - # Get the first frame - frame = thread.GetFrameAtIndex(0) - if frame: - # Print some simple frame info - print(frame) - function = frame.GetFunction() - # See if we have debug info (a function) - if function: - # We do have a function, print some info for the function - print(function) - # Now get all instructions for this function and print them - insts = function.GetInstructions(target) - disassemble_instructions(insts) - else: - # See if we have a symbol in the symbol table for where we stopped - symbol = frame.GetSymbol() - if symbol: - # We do have a symbol, print some info for the symbol - print(symbol) - -Writing lldb frame recognizers in Python ----------------------------------------- - -Frame recognizers allow for retrieving information about special frames based -on ABI, arguments or other special properties of that frame, even without -source code or debug info. Currently, one use case is to extract function -arguments that would otherwise be inaccessible, or augment existing arguments. - -Adding a custom frame recognizer is done by implementing a Python class and -using the 'frame recognizer add' command. The Python class should have a -'get_recognized_arguments' method and it will receive an argument of type -lldb.SBFrame representing the current frame that we are trying to recognize. -The method should return a (possibly empty) list of lldb.SBValue objects that -represent the recognized arguments. - -An example of a recognizer that retrieves the file descriptor values from libc -functions 'read', 'write' and 'close' follows: - -:: - - class LibcFdRecognizer(object): - def get_recognized_arguments(self, frame): - if frame.name in ["read", "write", "close"]: - fd = frame.EvaluateExpression("$arg1").unsigned - target = frame.thread.process.target - value = target.CreateValueFromExpression("fd", "(int)%d" % fd) - return [value] - return [] - -The file containing this implementation can be imported via ``command script import`` -and then we can register this recognizer with ``frame recognizer add``. -It's important to restrict the recognizer to the libc library (which is -libsystem_kernel.dylib on macOS) to avoid matching functions with the same name -in other modules: - -:: - - (lldb) command script import .../fd_recognizer.py - (lldb) frame recognizer add -l fd_recognizer.LibcFdRecognizer -n read -s libsystem_kernel.dylib - -When the program is stopped at the beginning of the 'read' function in libc, we can view the recognizer arguments in 'frame variable': - -:: - - (lldb) b read - (lldb) r - Process 1234 stopped - * thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.3 - frame #0: 0x00007fff06013ca0 libsystem_kernel.dylib`read - (lldb) frame variable - (int) fd = 3 - -Writing Target Stop-Hooks in Python ------------------------------------ - -Stop hooks fire whenever the process stops just before control is returned to the -user. Stop hooks can either be a set of lldb command-line commands, or can -be implemented by a suitably defined Python class. The Python-based stop-hooks -can also be passed as a set of -key -value pairs when they are added, and those -will get packaged up into a SBStructuredData Dictionary and passed to the -constructor of the Python object managing the stop hook. This allows for -parameterization of the stop hooks. - -To add a Python-based stop hook, first define a class with the following methods: - -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ -| Name | Arguments | Description | -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ -| ``__init__`` | ``target: lldb.SBTarget`` | This is the constructor for the new stop-hook. | -| | ``extra_args: lldb.SBStructuredData`` | | -| | | | -| | | ``target`` is the SBTarget to which the stop hook is added. | -| | | | -| | | ``extra_args`` is an SBStructuredData object that the user can pass in when creating instances of this | -| | | breakpoint. It is not required, but allows for reuse of stop-hook classes. | -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ -| ``handle_stop`` | ``exe_ctx: lldb.SBExecutionContext`` | This is the called when the target stops. | -| | ``stream: lldb.SBStream`` | | -| | | ``exe_ctx`` argument will be filled with the current stop point for which the stop hook is | -| | | being evaluated. | -| | | | -| | | ``stream`` an lldb.SBStream, anything written to this stream will be written to the debugger console. | -| | | | -| | | The return value is a "Should Stop" vote from this thread. If the method returns either True or no return | -| | | this thread votes to stop. If it returns False, then the thread votes to continue after all the stop-hooks | -| | | are evaluated. | -| | | Note, the --auto-continue flag to 'target stop-hook add' overrides a True return value from the method. | -+--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ - -To use this class in lldb, run the command: - -:: - - (lldb) command script import MyModule.py - (lldb) target stop-hook add -P MyModule.MyStopHook -k first -v 1 -k second -v 2 - -where MyModule.py is the file containing the class definition MyStopHook. +Python Tutorials +----------------- + +The following tutorials and documentation demonstrate various Python capabilities within LLDB: + +.. toctree:: + :maxdepth: 1 + + tutorials/accessing-documentation + tutorials/python-embedded-interpreter + tutorials/script-driven-debugging + tutorials/breakpoint-triggered-scripts + tutorials/creating-custom-breakpoints + tutorials/automating-stepping-logic + tutorials/writing-custom-commands + tutorials/implementing-standalone-scripts + tutorials/custom-frame-recognizers + tutorials/extending-target-stop-hooks \ No newline at end of file diff --git a/lldb/docs/use/python.rst b/lldb/docs/use/python.rst deleted file mode 100644 index 3a919f2a8cdb1..0000000000000 --- a/lldb/docs/use/python.rst +++ /dev/null @@ -1,799 +0,0 @@ -Python Scripting -================ - -LLDB has been structured from the beginning to be scriptable in two -ways -- a Unix Python session can initiate/run a debug session -non-interactively using LLDB; and within the LLDB debugger tool, Python -scripts can be used to help with many tasks, including inspecting -program data, iterating over containers and determining if a breakpoint -should stop execution or continue. This document will show how to do -some of these things by going through an example, explaining how to use -Python scripting to find a bug in a program that searches for text in a -large binary tree. - -The Test Program and Input --------------------------- - -We have a simple C program (dictionary.c) that reads in a text file, -and stores all the words from the file in a Binary Search Tree, sorted -alphabetically. It then enters a loop prompting the user for a word, -searching for the word in the tree (using Binary Search), and reporting -to the user whether or not it found the word in the tree. - -The input text file we are using to test our program contains the text -for William Shakespeare's famous tragedy "Romeo and Juliet". - -The Bug -------- - -When we try running our program, we find there is a problem. While it -successfully finds some of the words we would expect to find, such as -"love" or "sun", it fails to find the word "Romeo", which MUST be in -the input text file: - -:: - - $ ./dictionary Romeo-and-Juliet.txt - Dictionary loaded. - Enter search word: love - Yes! - Enter search word: sun - Yes! - Enter search word: Romeo - No! - Enter search word: ^D - $ - -Using Depth First Search ------------------------- - -Our first job is to determine if the word "Romeo" actually got inserted -into the tree or not. Since "Romeo and Juliet" has thousands of words, -trying to examine our binary search tree by hand is completely -impractical. Therefore we will write a Python script to search the tree -for us. We will write a recursive Depth First Search function that -traverses the entire tree searching for a word, and maintaining -information about the path from the root of the tree to the current -node. If it finds the word in the tree, it returns the path from the -root to the node containing the word. This is what our DFS function in -Python would look like, with line numbers added for easy reference in -later explanations: - -:: - - 1: def DFS (root, word, cur_path): - 2: root_word_ptr = root.GetChildMemberWithName ("word") - 3: left_child_ptr = root.GetChildMemberWithName ("left") - 4: right_child_ptr = root.GetChildMemberWithName ("right") - 5: root_word = root_word_ptr.GetSummary() - 6: end = len (root_word) - 1 - 7: if root_word[0] == '"' and root_word[end] == '"': - 8: root_word = root_word[1:end] - 9: end = len (root_word) - 1 - 10: if root_word[0] == '\'' and root_word[end] == '\'': - 11: root_word = root_word[1:end] - 12: if root_word == word: - 13: return cur_path - 14: elif word < root_word: - 15: if left_child_ptr.GetValue() is None: - 16: return "" - 17: else: - 18: cur_path = cur_path + "L" - 19: return DFS (left_child_ptr, word, cur_path) - 20: else: - 21: if right_child_ptr.GetValue() is None: - 22: return "" - 23: else: - 24: cur_path = cur_path + "R" - 25: return DFS (right_child_ptr, word, cur_path) - - -Accessing & Manipulating Program Variables ------------------------------------------- - -Before we can call any Python function on any of our program's -variables, we need to get the variable into a form that Python can -access. To show you how to do this we will look at the parameters for -the DFS function. The first parameter is going to be a node in our -binary search tree, put into a Python variable. The second parameter is -the word we are searching for (a string), and the third parameter is a -string representing the path from the root of the tree to our current -node. - -The most interesting parameter is the first one, the Python variable -that needs to contain a node in our search tree. How can we take a -variable out of our program and put it into a Python variable? What -kind of Python variable will it be? The answers are to use the LLDB API -functions, provided as part of the LLDB Python module. Running Python -from inside LLDB, LLDB will automatically give us our current frame -object as a Python variable, "lldb.frame". This variable has the type -`SBFrame` (see the LLDB API for more information about `SBFrame` -objects). One of the things we can do with a frame object, is to ask it -to find and return its local variable. We will call the API function -`SBFrame.FindVariable` on the lldb.frame object to give us our dictionary -variable as a Python variable: - -:: - - root = lldb.frame.FindVariable ("dictionary") - -The line above, executed in the Python script interpreter in LLDB, asks the -current frame to find the variable named "dictionary" and return it. We then -store the returned value in the Python variable named "root". This answers the -question of HOW to get the variable, but it still doesn't explain WHAT actually -gets put into "root". If you examine the LLDB API, you will find that the -`SBFrame` method "FindVariable" returns an object of type `SBValue`. `SBValue` -objects are used, among other things, to wrap up program variables and values. -There are many useful methods defined in the `SBValue` class to allow you to get -information or children values out of SBValues. For complete information, see -the header file SBValue.h. The `SBValue` methods that we use in our DFS function -are ``GetChildMemberWithName()``, ``GetSummary()``, and ``GetValue()``. - - -Explaining DFS Script in Detail -------------------------------- - -Before diving into the details of this code, it would be best to give a -high-level overview of what it does. The nodes in our binary search tree were -defined to have type ``tree_node *``, which is defined as: - -:: - - typedef struct tree_node - { - const char *word; - struct tree_node *left; - struct tree_node *right; - } tree_node; - -Lines 2-11 of DFS are getting data out of the current tree node and getting -ready to do the actual search; lines 12-25 are the actual depth-first search. -Lines 2-4 of our DFS function get the word, left and right fields out of the -current node and store them in Python variables. Since root_word_ptr is a -pointer to our word, and we want the actual word, line 5 calls GetSummary() to -get a string containing the value out of the pointer. Since GetSummary() adds -quotes around its result, lines 6-11 strip surrounding quotes off the word. - -Line 12 checks to see if the word in the current node is the one we are -searching for. If so, we are done, and line 13 returns the current path. -Otherwise, line 14 checks to see if we should go left (search word comes before -the current word). If we decide to go left, line 15 checks to see if the left -pointer child is NULL ("None" is the Python equivalent of NULL). If the left -pointer is NULL, then the word is not in this tree and we return an empty path -(line 16). Otherwise, we add an "L" to the end of our current path string, to -indicate we are going left (line 18), and then recurse on the left child (line -19). Lines 20-25 are the same as lines 14-19, except for going right rather -than going left. - -One other note: Typing something as long as our DFS function directly into the -interpreter can be difficult, as making a single typing mistake means having to -start all over. Therefore we recommend doing as we have done: Writing your -longer, more complicated script functions in a separate file (in this case -tree_utils.py) and then importing it into your LLDB Python interpreter. - - -The DFS Script in Action ------------------------- - -At this point we are ready to use the DFS function to see if the word "Romeo" -is in our tree or not. To actually use it in LLDB on our dictionary program, -you would do something like this: - -:: - - $ lldb - (lldb) process attach -n "dictionary" - Architecture set to: x86_64. - Process 521 stopped - * thread #1: tid = 0x2c03, 0x00007fff86c8bea0 libSystem.B.dylib`read$NOCANCEL + 8, stop reason = signal SIGSTOP - frame #0: 0x00007fff86c8bea0 libSystem.B.dylib`read$NOCANCEL + 8 - (lldb) breakpoint set -n find_word - Breakpoint created: 1: name = 'find_word', locations = 1, resolved = 1 - (lldb) continue - Process 521 resuming - Process 521 stopped - * thread #1: tid = 0x2c03, 0x0000000100001830 dictionary`find_word + 16 - at dictionary.c:105, stop reason = breakpoint 1.1 - frame #0: 0x0000000100001830 dictionary`find_word + 16 at dictionary.c:105 - 102 int - 103 find_word (tree_node *dictionary, char *word) - 104 { - -> 105 if (!word || !dictionary) - 106 return 0; - 107 - 108 int compare_value = strcmp (word, dictionary->word); - (lldb) script - Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. - >>> import tree_utils - >>> root = lldb.frame.FindVariable ("dictionary") - >>> current_path = "" - >>> path = tree_utils.DFS (root, "Romeo", current_path) - >>> print path - LLRRL - >>> ^D - (lldb) - -The first bit of code above shows starting lldb, attaching to the dictionary -program, and getting to the find_word function in LLDB. The interesting part -(as far as this example is concerned) begins when we enter the script command -and drop into the embedded interactive Python interpreter. We will go over this -Python code line by line. The first line - -:: - - import tree_utils - - -imports the file where we wrote our DFS function, tree_utils.py, into Python. -Notice that to import the file we leave off the ".py" extension. We can now -call any function in that file, giving it the prefix "tree_utils.", so that -Python knows where to look for the function. The line - -:: - - root = lldb.frame.FindVariable ("dictionary") - - -gets our program variable "dictionary" (which contains the binary search tree) -and puts it into the Python variable "root". See Accessing & Manipulating -Program Variables in Python above for more details about how this works. The -next line is - -:: - - current_path = "" - -This line initializes the current_path from the root of the tree to our current -node. Since we are starting at the root of the tree, our current path starts as -an empty string. As we go right and left through the tree, the DFS function -will append an 'R' or an 'L' to the current path, as appropriate. The line - -:: - - path = tree_utils.DFS (root, "Romeo", current_path) - -calls our DFS function (prefixing it with the module name so that Python can -find it). We pass in our binary tree stored in the variable root, the word we -are searching for, and our current path. We assign whatever path the DFS -function returns to the Python variable path. - -Finally, we want to see if the word was found or not, and if so we want to see -the path through the tree to the word. So we do - -:: - - print path - -From this we can see that the word "Romeo" was indeed found in the tree, and -the path from the root of the tree to the node containing "Romeo" is -left-left-right-right-left. - -Using Breakpoint Command Scripts --------------------------------- - -We are halfway to figuring out what the problem is. We know the word we are -looking for is in the binary tree, and we know exactly where it is in the -binary tree. Now we need to figure out why our binary search algorithm is not -finding the word. We will do this using breakpoint command scripts. - -The idea is as follows. The binary search algorithm has two main decision -points: the decision to follow the right branch; and, the decision to follow -the left branch. We will set a breakpoint at each of these decision points, and -attach a Python breakpoint command script to each breakpoint. The breakpoint -commands will use the global path Python variable that we got from our DFS -function. Each time one of these decision breakpoints is hit, the script will -compare the actual decision with the decision the front of the path variable -says should be made (the first character of the path). If the actual decision -and the path agree, then the front character is stripped off the path, and -execution is resumed. In this case the user never even sees the breakpoint -being hit. But if the decision differs from what the path says it should be, -then the script prints out a message and does NOT resume execution, leaving the -user sitting at the first point where a wrong decision is being made. - -Python Breakpoint Command Scripts Are Not What They Seem --------------------------------------------------------- - -What do we mean by that? When you enter a Python breakpoint command in LLDB, it -appears that you are entering one or more plain lines of Python. BUT LLDB then -takes what you entered and wraps it into a Python FUNCTION (just like using the -"def" Python command). It automatically gives the function an obscure, unique, -hard-to-stumble-across function name, and gives it two parameters: frame and -bp_loc. When the breakpoint gets hit, LLDB wraps up the frame object where the -breakpoint was hit, and the breakpoint location object for the breakpoint that -was hit, and puts them into Python variables for you. It then calls the Python -function that was created for the breakpoint command, and passes in the frame -and breakpoint location objects. - -So, being practical, what does this mean for you when you write your Python -breakpoint commands? It means that there are two things you need to keep in -mind: 1. If you want to access any Python variables created outside your -script, you must declare such variables to be global. If you do not declare -them as global, then the Python function will treat them as local variables, -and you will get unexpected behavior. 2. All Python breakpoint command scripts -automatically have a frame and a bp_loc variable. The variables are pre-loaded -by LLDB with the correct context for the breakpoint. You do not have to use -these variables, but they are there if you want them. - -The Decision Point Breakpoint Commands --------------------------------------- - -This is what the Python breakpoint command script would look like for the -decision to go right: - -:: - - global path - if path[0] == 'R': - path = path[1:] - thread = frame.GetThread() - process = thread.GetProcess() - process.Continue() - else: - print "Here is the problem; going right, should go left!" - - -Just as a reminder, LLDB is going to take this script and wrap it up in a function, like this: - -:: - - def some_unique_and_obscure_function_name (frame, bp_loc): - global path - if path[0] == 'R': - path = path[1:] - thread = frame.GetThread() - process = thread.GetProcess() - process.Continue() - else: - print "Here is the problem; going right, should go left!" - -LLDB will call the function, passing in the correct frame and breakpoint -location whenever the breakpoint gets hit. There are several things to notice -about this function. The first one is that we are accessing and updating a -piece of state (the path variable), and actually conditioning our behavior -based upon this variable. Since the variable was defined outside of our script -(and therefore outside of the corresponding function) we need to tell Python -that we are accessing a global variable. That is what the first line of the -script does. Next we check where the path says we should go and compare it to -our decision (recall that we are at the breakpoint for the decision to go -right). If the path agrees with our decision, then we strip the first character -off of the path. - -Since the decision matched the path, we want to resume execution. To do this we -make use of the frame parameter that LLDB guarantees will be there for us. We -use LLDB API functions to get the current thread from the current frame, and -then to get the process from the thread. Once we have the process, we tell it -to resume execution (using the Continue() API function). - -If the decision to go right does not agree with the path, then we do not resume -execution. We allow the breakpoint to remain stopped (by doing nothing), and we -print an informational message telling the user we have found the problem, and -what the problem is. - -Actually Using The Breakpoint Commands --------------------------------------- - -Now we will look at what happens when we actually use these breakpoint commands -on our program. Doing a source list -n find_word shows us the function -containing our two decision points. Looking at the code below, we see that we -want to set our breakpoints on lines 113 and 115: - -:: - - (lldb) source list -n find_word - File: /Volumes/Data/HD2/carolinetice/Desktop/LLDB-Web-Examples/dictionary.c. - 101 - 102 int - 103 find_word (tree_node *dictionary, char *word) - 104 { - 105 if (!word || !dictionary) - 106 return 0; - 107 - 108 int compare_value = strcmp (word, dictionary->word); - 109 - 110 if (compare_value == 0) - 111 return 1; - 112 else if (compare_value < 0) - 113 return find_word (dictionary->left, word); - 114 else - 115 return find_word (dictionary->right, word); - 116 } - 117 - - -So, we set our breakpoints, enter our breakpoint command scripts, and see what happens: - -:: - - (lldb) breakpoint set -l 113 - Breakpoint created: 2: file ='dictionary.c', line = 113, locations = 1, resolved = 1 - (lldb) breakpoint set -l 115 - Breakpoint created: 3: file ='dictionary.c', line = 115, locations = 1, resolved = 1 - (lldb) breakpoint command add -s python 2 - Enter your Python command(s). Type 'DONE' to end. - > global path - > if (path[0] == 'L'): - > path = path[1:] - > thread = frame.GetThread() - > process = thread.GetProcess() - > process.Continue() - > else: - > print "Here is the problem. Going left, should go right!" - > DONE - (lldb) breakpoint command add -s python 3 - Enter your Python command(s). Type 'DONE' to end. - > global path - > if (path[0] == 'R'): - > path = path[1:] - > thread = frame.GetThread() - > process = thread.GetProcess() - > process.Continue() - > else: - > print "Here is the problem. Going right, should go left!" - > DONE - (lldb) continue - Process 696 resuming - Here is the problem. Going right, should go left! - Process 696 stopped - * thread #1: tid = 0x2d03, 0x000000010000189f dictionary`find_word + 127 at dictionary.c:115, stop reason = breakpoint 3.1 - frame #0: 0x000000010000189f dictionary`find_word + 127 at dictionary.c:115 - 112 else if (compare_value < 0) - 113 return find_word (dictionary->left, word); - 114 else - -> 115 return find_word (dictionary->right, word); - 116 } - 117 - 118 void - (lldb) - - -After setting our breakpoints, adding our breakpoint commands and continuing, -we run for a little bit and then hit one of our breakpoints, printing out the -error message from the breakpoint command. Apparently at this point in the -tree, our search algorithm decided to go right, but our path says the node we -want is to the left. Examining the word at the node where we stopped, and our -search word, we see: - -:: - - (lldb) expr dictionary->word - (const char *) $1 = 0x0000000100100080 "dramatis" - (lldb) expr word - (char *) $2 = 0x00007fff5fbff108 "romeo" - -So the word at our current node is "dramatis", and the word we are searching -for is "romeo". "romeo" comes after "dramatis" alphabetically, so it seems like -going right would be the correct decision. Let's ask Python what it thinks the -path from the current node to our word is: - -:: - - (lldb) script print path - LLRRL - -According to Python we need to go left-left-right-right-left from our current -node to find the word we are looking for. Let's double check our tree, and see -what word it has at that node: - -:: - - (lldb) expr dictionary->left->left->right->right->left->word - (const char *) $4 = 0x0000000100100880 "Romeo" - -So the word we are searching for is "romeo" and the word at our DFS location is -"Romeo". Aha! One is uppercase and the other is lowercase: We seem to have a -case conversion problem somewhere in our program (we do). - -This is the end of our example on how you might use Python scripting in LLDB to -help you find bugs in your program. - -Source Files for The Example ----------------------------- - -The complete code for the Dictionary program (with case-conversion bug), the -DFS function and other Python script examples (tree_utils.py) used for this -example are available below. - -tree_utils.py - Example Python functions using LLDB's API, including DFS - -:: - - """ - # ===-- tree_utils.py ---------------------------------------*- Python -*-===// - # - # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - # See https://llvm.org/LICENSE.txt for license information. - # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - # - # ===----------------------------------------------------------------------===// - - tree_utils.py - A set of functions for examining binary - search trees, based on the example search tree defined in - dictionary.c. These functions contain calls to LLDB API - functions, and assume that the LLDB Python module has been - imported. - - For a thorough explanation of how the DFS function works, and - for more information about dictionary.c go to - http://lldb.llvm.org/scripting.html - """ - - - def DFS(root, word, cur_path): - """ - Recursively traverse a binary search tree containing - words sorted alphabetically, searching for a particular - word in the tree. Also maintains a string representing - the path from the root of the tree to the current node. - If the word is found in the tree, return the path string. - Otherwise return an empty string. - - This function assumes the binary search tree is - the one defined in dictionary.c It uses LLDB API - functions to examine and traverse the tree nodes. - """ - - # Get pointer field values out of node 'root' - - root_word_ptr = root.GetChildMemberWithName("word") - left_child_ptr = root.GetChildMemberWithName("left") - right_child_ptr = root.GetChildMemberWithName("right") - - # Get the word out of the word pointer and strip off - # surrounding quotes (added by call to GetSummary). - - root_word = root_word_ptr.GetSummary() - end = len(root_word) - 1 - if root_word[0] == '"' and root_word[end] == '"': - root_word = root_word[1:end] - end = len(root_word) - 1 - if root_word[0] == '\'' and root_word[end] == '\'': - root_word = root_word[1:end] - - # Main depth first search - - if root_word == word: - return cur_path - elif word < root_word: - - # Check to see if left child is NULL - - if left_child_ptr.GetValue() is None: - return "" - else: - cur_path = cur_path + "L" - return DFS(left_child_ptr, word, cur_path) - else: - - # Check to see if right child is NULL - - if right_child_ptr.GetValue() is None: - return "" - else: - cur_path = cur_path + "R" - return DFS(right_child_ptr, word, cur_path) - - - def tree_size(root): - """ - Recursively traverse a binary search tree, counting - the nodes in the tree. Returns the final count. - - This function assumes the binary search tree is - the one defined in dictionary.c It uses LLDB API - functions to examine and traverse the tree nodes. - """ - if (root.GetValue is None): - return 0 - - if (int(root.GetValue(), 16) == 0): - return 0 - - left_size = tree_size(root.GetChildAtIndex(1)) - right_size = tree_size(root.GetChildAtIndex(2)) - - total_size = left_size + right_size + 1 - return total_size - - - def print_tree(root): - """ - Recursively traverse a binary search tree, printing out - the words at the nodes in alphabetical order (the - search order for the binary tree). - - This function assumes the binary search tree is - the one defined in dictionary.c It uses LLDB API - functions to examine and traverse the tree nodes. - """ - if (root.GetChildAtIndex(1).GetValue() is not None) and ( - int(root.GetChildAtIndex(1).GetValue(), 16) != 0): - print_tree(root.GetChildAtIndex(1)) - - print root.GetChildAtIndex(0).GetSummary() - - if (root.GetChildAtIndex(2).GetValue() is not None) and ( - int(root.GetChildAtIndex(2).GetValue(), 16) != 0): - print_tree(root.GetChildAtIndex(2)) - - -dictionary.c - Sample dictionary program, with bug - -:: - - //===-- dictionary.c ---------------------------------------------*- C -*-===// - // - // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - // See https://llvm.org/LICENSE.txt for license information. - // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - // - //===----------------------------------------------------------------------===// - #include - #include - #include - #include - - typedef struct tree_node { - const char *word; - struct tree_node *left; - struct tree_node *right; - } tree_node; - - /* Given a char*, returns a substring that starts at the first - alphabet character and ends at the last alphabet character, i.e. it - strips off beginning or ending quotes, punctuation, etc. */ - - char *strip(char **word) { - char *start = *word; - int len = strlen(start); - char *end = start + len - 1; - - while ((start < end) && (!isalpha(start[0]))) - start++; - - while ((end > start) && (!isalpha(end[0]))) - end--; - - if (start > end) - return NULL; - - end[1] = '\0'; - *word = start; - - return start; - } - - /* Given a binary search tree (sorted alphabetically by the word at - each node), and a new word, inserts the word at the appropriate - place in the tree. */ - - void insert(tree_node *root, char *word) { - if (root == NULL) - return; - - int compare_value = strcmp(word, root->word); - - if (compare_value == 0) - return; - - if (compare_value < 0) { - if (root->left != NULL) - insert(root->left, word); - else { - tree_node *new_node = (tree_node *)malloc(sizeof(tree_node)); - new_node->word = strdup(word); - new_node->left = NULL; - new_node->right = NULL; - root->left = new_node; - } - } else { - if (root->right != NULL) - insert(root->right, word); - else { - tree_node *new_node = (tree_node *)malloc(sizeof(tree_node)); - new_node->word = strdup(word); - new_node->left = NULL; - new_node->right = NULL; - root->right = new_node; - } - } - } - - /* Read in a text file and storea all the words from the file in a - binary search tree. */ - - void populate_dictionary(tree_node **dictionary, char *filename) { - FILE *in_file; - char word[1024]; - - in_file = fopen(filename, "r"); - if (in_file) { - while (fscanf(in_file, "%s", word) == 1) { - char *new_word = (strdup(word)); - new_word = strip(&new_word); - if (*dictionary == NULL) { - tree_node *new_node = (tree_node *)malloc(sizeof(tree_node)); - new_node->word = new_word; - new_node->left = NULL; - new_node->right = NULL; - *dictionary = new_node; - } else - insert(*dictionary, new_word); - } - } - } - - /* Given a binary search tree and a word, search for the word - in the binary search tree. */ - - int find_word(tree_node *dictionary, char *word) { - if (!word || !dictionary) - return 0; - - int compare_value = strcmp(word, dictionary->word); - - if (compare_value == 0) - return 1; - else if (compare_value < 0) - return find_word(dictionary->left, word); - else - return find_word(dictionary->right, word); - } - - /* Print out the words in the binary search tree, in sorted order. */ - - void print_tree(tree_node *dictionary) { - if (!dictionary) - return; - - if (dictionary->left) - print_tree(dictionary->left); - - printf("%s\n", dictionary->word); - - if (dictionary->right) - print_tree(dictionary->right); - } - - int main(int argc, char **argv) { - tree_node *dictionary = NULL; - char buffer[1024]; - char *filename; - int done = 0; - - if (argc == 2) - filename = argv[1]; - - if (!filename) - return -1; - - populate_dictionary(&dictionary, filename); - fprintf(stdout, "Dictionary loaded.\nEnter search word: "); - while (!done && fgets(buffer, sizeof(buffer), stdin)) { - char *word = buffer; - int len = strlen(word); - int i; - - for (i = 0; i < len; ++i) - word[i] = tolower(word[i]); - - if ((len > 0) && (word[len - 1] == '\n')) { - word[len - 1] = '\0'; - len = len - 1; - } - - if (find_word(dictionary, word)) - fprintf(stdout, "Yes!\n"); - else - fprintf(stdout, "No!\n"); - - fprintf(stdout, "Enter search word: "); - } - - fprintf(stdout, "\n"); - return 0; - } - - -The text for "Romeo and Juliet" can be obtained from the Gutenberg Project -(http://www.gutenberg.org). - diff --git a/lldb/docs/use/tutorials/accessing-documentation.md b/lldb/docs/use/tutorials/accessing-documentation.md new file mode 100644 index 0000000000000..d14efa5f3c428 --- /dev/null +++ b/lldb/docs/use/tutorials/accessing-documentation.md @@ -0,0 +1,62 @@ +# Accessing Script Documentation + +The LLDB API is contained in a python module named lldb. A useful resource when +writing Python extensions is the lldb Python classes reference guide. + +The documentation is also accessible in an interactive debugger session with +the following command: + +```python3 +(lldb) script help(lldb) + Help on package lldb: + + NAME + lldb - The lldb module contains the public APIs for Python binding. + + FILE + /System/Library/PrivateFrameworks/LLDB.framework/Versions/A/Resources/Python/lldb/__init__.py + + DESCRIPTION +... +``` + +You can also get help using a module class name. The full API that is exposed +for that class will be displayed in a man page style window. Below we want to +get help on the lldb.SBFrame class: + +```python3 +(lldb) script help(lldb.SBFrame) + Help on class SBFrame in module lldb: + + class SBFrame(builtins.object) + | SBFrame(*args) + | + | Represents one of the stack frames associated with a thread. + | + | SBThread contains SBFrame(s). For example (from test/lldbutil.py), :: + | + | def print_stacktrace(thread, string_buffer = False): + | '''Prints a simple stack trace of this thread.''' +... +``` + +Or you can get help using any python object, here we use the lldb.process +object which is a global variable in the lldb module which represents the +currently selected process: + +```python3 +(lldb) script help(lldb.process) + Help on SBProcess in module lldb object: + + class SBProcess(builtins.object) + | SBProcess(*args) + | + | Represents the process associated with the target program. + | + | SBProcess supports thread iteration. For example (from test/lldbutil.py), :: + | + | # ================================================== + | # Utility functions related to Threads and Processes + | # ================================================== +... +``` \ No newline at end of file diff --git a/lldb/docs/use/tutorials/automating-stepping-logic.md b/lldb/docs/use/tutorials/automating-stepping-logic.md new file mode 100644 index 0000000000000..564d3ec1f14d4 --- /dev/null +++ b/lldb/docs/use/tutorials/automating-stepping-logic.md @@ -0,0 +1,42 @@ +# Automating Stepping Logic + +A slightly esoteric use of the Python API's is to construct custom stepping +types. LLDB's stepping is driven by a stack of "thread plans" and a fairly +simple state machine that runs the plans. You can create a Python class that +works as a thread plan, and responds to the requests the state machine makes to +run its operations. + +The base class for the [ScriptedThreadPlan](https://lldb.llvm.org/python_api/lldb.plugins.scripted_thread_plan.ScriptedThreadPlan.html) is provided as part of the lldb python module, making it easy to derive a new class from it. + +There is a longer discussion of scripted thread plans and the state machine, +and several interesting examples of their use in [scripted_step.py](https://github.com/llvm/llvm-project/blob/main/lldb/examples/python/scripted_step.py) +and for a **MUCH** fuller discussion of the whole state machine, see [ThreadPlan.h](https://github.com/llvm/llvm-project/blob/main/lldb/include/lldb/Target/ThreadPlan.h) + +If you are reading those comments it is useful to know that scripted thread +plans are set to be either ***"ControllingPlans"*** or ***"OkayToDiscard"***. + +To implement a scripted step, you define a python class that has the following +methods: + +| Name | Arguments | Description | +|------|-----------|-------------| +| `__init__` | `thread_plan`: `lldb.SBThreadPlan` | This is the underlying `SBThreadPlan` that is pushed onto the plan stack. You will want to store this away in an ivar. Also, if you are going to use one of the canned thread plans, you can queue it at this point. | +| `explains_stop` | `event`: `lldb.SBEvent` | Return True if this stop is part of your thread plans logic, false otherwise. | +| `is_stale` | `None` | If your plan is no longer relevant (for instance, you were stepping in a particular stack frame, but some other operation pushed that frame off the stack) return True and your plan will get popped. | +| `should_step` | `None` | Return `True` if you want lldb to instruction step one instruction, or False to continue till the next breakpoint is hit. | +| `should_stop` | `event`: `lldb.SBEvent` | If your plan wants to stop and return control to the user at this point, return True. If your plan is done at this point, call SetPlanComplete on your thread plan instance. Also, do any work you need here to set up the next stage of stepping. | + +To use this class to implement a step, use the command: + +```python3 +(lldb) thread step-scripted -C MyModule.MyStepPlanClass +``` + +Or use the `SBThread.StepUsingScriptedThreadPlan` API. The `SBThreadPlan` passed +into your `__init__` function can also push several common plans (step +in/out/over and run-to-address) in front of itself on the stack, which can be +used to compose more complex stepping operations. When you use subsidiary plans +your explains_stop and should_stop methods won't get called until the +subsidiary plan is done, or the process stops for an event the subsidiary plan +doesn't explain. For instance, step over plans don't explain a breakpoint hit +while performing the step-over. \ No newline at end of file diff --git a/lldb/docs/use/tutorials/breakpoint-triggered-scripts.md b/lldb/docs/use/tutorials/breakpoint-triggered-scripts.md new file mode 100644 index 0000000000000..0cd9f945f0d11 --- /dev/null +++ b/lldb/docs/use/tutorials/breakpoint-triggered-scripts.md @@ -0,0 +1,85 @@ +# Breakpoint-Triggered Scripts + +One very powerful use of the lldb Python API is to have a python script run +when a breakpoint gets hit. Adding python scripts to breakpoints provides a way +to create complex breakpoint conditions and also allows for smart logging and +data gathering. + +When your process hits a breakpoint to which you have attached some python +code, the code is executed as the body of a function which takes three +arguments: + +```python3 +def breakpoint_function_wrapper(frame, bp_loc, internal_dict): + # Your code goes here +``` + +or: + +```python3 +def breakpoint_function_wrapper(frame, bp_loc, extra_args, internal_dict): + # Your code goes here +``` + +| Argument | Type | Description | +|----------|------|-------------| +| `frame` | `lldb.SBFrame` | The current stack frame where the breakpoint got hit. The object will always be valid. This `frame` argument might *not* match the currently selected stack frame found in the `lldb` module global variable `lldb.frame`. | +| `bp_loc` | `lldb.SBBreakpointLocation` | The breakpoint location that just got hit. Breakpoints are represented by `lldb.SBBreakpoint` objects. These breakpoint objects can have one or more locations. These locations are represented by `lldb.SBBreakpointLocation` objects. | +| `extra_args` | `lldb.SBStructuredData` | **Optional** If your breakpoint callback function takes this extra parameter, then when the callback gets added to a breakpoint, its contents can parametrize this use of the callback. For instance, instead of writing a callback that stops when the caller is "Foo", you could take the function name from a field in the `extra_args`, making the callback more general. The `-k` and `-v` options to `breakpoint command add` will be passed as a Dictionary in the `extra_args` parameter, or you can provide it with the SB API's. | +| `internal_dict` | `dict` | The python session dictionary as a standard python dictionary object. | + +Optionally, a Python breakpoint command can return a value. Returning `False` +tells LLDB that you do not want to stop at the breakpoint. Any other return +value (including None or leaving out the return statement altogether) is akin +to telling LLDB to actually stop at the breakpoint. This can be useful in +situations where a breakpoint only needs to stop the process when certain +conditions are met, and you do not want to inspect the program state manually +at every stop and then continue. + +An example will show how simple it is to write some python code and attach it +to a breakpoint. The following example will allow you to track the order in +which the functions in a given shared library are first executed during one run +of your program. This is a simple method to gather an order file which can be +used to optimize function placement within a binary for execution locality. + +We do this by setting a regular expression breakpoint that will match every +function in the shared library. The regular expression '.' will match any +string that has at least one character in it, so we will use that. This will +result in one lldb.SBBreakpoint object that contains an +lldb.SBBreakpointLocation object for each function. As the breakpoint gets hit, +we use a counter to track the order in which the function at this particular +breakpoint location got hit. Since our code is passed the location that was +hit, we can get the name of the function from the location, disable the +location so we won't count this function again; then log some info and continue +the process. + +Note we also have to initialize our counter, which we do with the simple +one-line version of the script command. + +Here is the code: + +```python3 +(lldb) breakpoint set --func-regex=. --shlib=libfoo.dylib +Breakpoint created: 1: regex = '.', module = libfoo.dylib, locations = 223 +(lldb) script counter = 0 +(lldb) breakpoint command add --script-type python 1 +Enter your Python command(s). Type 'DONE' to end. +> # Increment our counter. Since we are in a function, this must be a global python variable +> global counter +> counter += 1 +> # Get the name of the function +> name = frame.GetFunctionName() +> # Print the order and the function name +> print('[%i] %s' % (counter, name)) +> # Disable the current breakpoint location so it doesn't get hit again +> bp_loc.SetEnabled(False) +> # No need to stop here +> return False +> DONE +``` + +The breakpoint command add command above attaches a python script to breakpoint 1. To remove the breakpoint command: + +```python3 +(lldb) breakpoint command delete 1 +``` \ No newline at end of file diff --git a/lldb/docs/use/tutorials/creating-custom-breakpoints.md b/lldb/docs/use/tutorials/creating-custom-breakpoints.md new file mode 100644 index 0000000000000..e3081c44e3650 --- /dev/null +++ b/lldb/docs/use/tutorials/creating-custom-breakpoints.md @@ -0,0 +1,128 @@ +# Custom Breakpoint Resolvers + +Another use of the Python API's in lldb is to create a custom breakpoint +resolver. + +It allows you to provide the algorithm which will be used in the breakpoint's +search of the space of the code in a given Target to determine where to set the +breakpoint locations - the actual places where the breakpoint will trigger. To +understand how this works you need to know a little about how lldb handles +breakpoints. + +In lldb, a breakpoint is composed of three parts: +1. the Searcher +2. the Resolver, +3. the Stop Options. + +The Searcher and Resolver cooperate to determine how breakpoint locations are +set and differ between each breakpoint type. Stop options determine what +happens when a location triggers and includes the commands, conditions, ignore +counts, etc. Stop options are common between all breakpoint types, so for our +purposes only the Searcher and Resolver are relevant. + +### Breakpoint Searcher + +The Searcher's job is to traverse in a structured way the code in the current +target. It proceeds from the Target, to search all the Modules in the Target, +in each Module it can recurse into the Compile Units in that module, and within +each Compile Unit it can recurse over the Functions it contains. + +The Searcher can be provided with a SearchFilter that it will use to restrict +this search. For instance, if the SearchFilter specifies a list of Modules, the +Searcher will not recurse into Modules that aren't on the list. When you pass +the -s modulename flag to break set you are creating a Module-based search +filter. When you pass -f filename.c to break set -n you are creating a file +based search filter. If neither of these is specified, the breakpoint will have +a no-op search filter, so all parts of the program are searched and all +locations accepted. + +### Breakpoint Resolver + +The Resolver has two functions: + +The most important one is the callback it provides. This will get called at the +appropriate time in the course of the search. The callback is where the job of +adding locations to the breakpoint gets done. + +The other function is specifying to the Searcher at what depth in the above +described recursion it wants to be called. Setting a search depth also provides +a stop for the recursion. For instance, if you request a Module depth search, +then the callback will be called for each Module as it gets added to the +Target, but the searcher will not recurse into the Compile Units in the module. + +One other slight subtlety is that the depth at which you get called back is not +necessarily the depth at which the SearchFilter is specified. For instance, +if you are doing symbol searches, it is convenient to use the Module depth for +the search, since symbols are stored in the module. But the SearchFilter might +specify some subset of CompileUnits, so not all the symbols you might find in +each module will pass the search. You don't need to handle this situation +yourself, since SBBreakpoint::AddLocation will only add locations that pass the +Search Filter. This API returns an SBError to inform you whether your location +was added. + +When the breakpoint is originally created, its Searcher will process all the +currently loaded modules. The Searcher will also visit any new modules as they +are added to the target. This happens, for instance, when a new shared library +gets added to the target in the course of running, or on rerunning if any of +the currently loaded modules have been changed. Note, in the latter case, all +the locations set in the old module will get deleted and you will be asked to +recreate them in the new version of the module when your callback gets called +with that module. For this reason, you shouldn't try to manage the locations +you add to the breakpoint yourself. Note that the Breakpoint takes care of +deduplicating equal addresses in AddLocation, so you shouldn't need to worry +about that anyway. + +### Scripted Breakpoint Resolver + +At present, when adding a ScriptedBreakpoint type, you can only provide a +custom Resolver, not a custom SearchFilter. + +The custom Resolver is provided as a Python class with the following methods: + +| Name | Arguments | Description | +|------|-----------|-------------| +| `__init__` | `bkpt`: `lldb.SBBreakpoint` `extra_args`: `lldb.SBStructuredData` | This is the constructor for the new Resolver. `bkpt` is the breakpoint owning this Resolver. `extra_args` is an `SBStructuredData` object that the user can pass in when creating instances of this breakpoint. It is not required, but is quite handy. For instance if you were implementing a breakpoint on some symbol name, you could write a generic symbol name based Resolver, and then allow the user to pass in the particular symbol in the extra_args | +| `__callback__` | `sym_ctx`: `lldb.SBSymbolContext` | This is the Resolver callback. The `sym_ctx` argument will be filled with the current stage of the search. For instance, if you asked for a search depth of lldb.eSearchDepthCompUnit, then the target, module and compile_unit fields of the sym_ctx will be filled. The callback should look just in the context passed in `sym_ctx` for new locations. If the callback finds an address of interest, it can add it to the breakpoint with the `SBBreakpoint.AddLocation` method, using the breakpoint passed in to the `__init__` method. | +| `__get_depth__` | `None` | Specify the depth at which you wish your callback to get called. The currently supported options are: `lldb.eSearchDepthModule` `lldb.eSearchDepthCompUnit` `lldb.eSearchDepthFunction` For instance, if you are looking up symbols, which are stored at the Module level, you will want to get called back module by module. So you would want to return `lldb.eSearchDepthModule`. This method is optional. If not provided the search will be done at Module depth. | +| `get_short_help` | `None` | This is an optional method. If provided, the returned string will be printed at the beginning of the description for this breakpoint. | + +To define a new breakpoint command defined by this class from the lldb command +line, use the command: + +``` +(lldb) breakpoint set -P MyModule.MyResolverClass +``` + +You can also populate the extra_args SBStructuredData with a dictionary of +key/value pairs with: + +``` +(lldb) breakpoint set -P MyModule.MyResolverClass -k key_1 -v value_1 -k key_2 -v value_2 +``` + +Although you can't write a scripted SearchFilter, both the command line and the +SB API's for adding a scripted resolver allow you to specify a SearchFilter +restricted to certain modules or certain compile units. When using the command +line to create the resolver, you can specify a Module specific SearchFilter by +passing the -s ModuleName option - which can be specified multiple times. You +can also specify a SearchFilter restricted to certain compile units by passing +in the -f CompUnitName option. This can also be specified more than once. And +you can mix the two to specify "this comp unit in this module". So, for +instance, + +``` +(lldb) breakpoint set -P MyModule.MyResolverClass -s a.out +``` + +will use your resolver, but will only recurse into or accept new locations in +the module a.out. + +Another option for creating scripted breakpoints is to use the +SBTarget.BreakpointCreateFromScript API. This one has the advantage that you +can pass in an arbitrary SBStructuredData object, so you can create more +complex parametrizations. SBStructuredData has a handy SetFromJSON method which +you can use for this purpose. Your __init__ function gets passed this +SBStructuredData object. This API also allows you to directly provide the list +of Modules and the list of CompileUnits that will make up the SearchFilter. If +you pass in empty lists, the breakpoint will use the default "search +everywhere,accept everything" filter. \ No newline at end of file diff --git a/lldb/docs/use/tutorials/custom-frame-recognizers.md b/lldb/docs/use/tutorials/custom-frame-recognizers.md new file mode 100644 index 0000000000000..17bf9637d9a85 --- /dev/null +++ b/lldb/docs/use/tutorials/custom-frame-recognizers.md @@ -0,0 +1,51 @@ +# Detecting Patterns With Recognizers + +Frame recognizers allow for retrieving information about special frames based +on ABI, arguments or other special properties of that frame, even without +source code or debug info. Currently, one use case is to extract function +arguments that would otherwise be inaccessible, or augment existing arguments. + +Adding a custom frame recognizer is done by implementing a Python class and +using the `frame recognizer add` command. The Python class should implement the +`get_recognized_arguments` method and it will receive an argument of type +`lldb.SBFrame` representing the current frame that we are trying to recognize. +The method should return a (possibly empty) list of `lldb.SBValue` objects that +represent the recognized arguments. + +An example of a recognizer that retrieves the file descriptor values from libc +functions 'read', 'write' and 'close' follows: + +```python3 +class LibcFdRecognizer: + def get_recognized_arguments(self, frame: lldb.SBFrame): + if frame.name in ["read", "write", "close"]: + fd = frame.EvaluateExpression("$arg1").unsigned + target = frame.thread.process.target + value = target.CreateValueFromExpression("fd", "(int)%d" % fd) + return [value] + return [] +``` + +The file containing this implementation can be imported via `command script import` +and then we can register this recognizer with `frame recognizer add`. + +It's important to restrict the recognizer to the libc library (which is +`libsystem_kernel.dylib` on macOS) to avoid matching functions with the same name +in other modules: + +```c++ +(lldb) command script import .../fd_recognizer.py +(lldb) frame recognizer add -l fd_recognizer.LibcFdRecognizer -n read -s libsystem_kernel.dylib +``` + +When the program is stopped at the beginning of the 'read' function in libc, we can view the recognizer arguments in 'frame variable': + +```c++ +(lldb) b read +(lldb) r +Process 1234 stopped +* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.3 + frame #0: 0x00007fff06013ca0 libsystem_kernel.dylib`read +(lldb) frame variable +(int) fd = 3 +``` \ No newline at end of file diff --git a/lldb/docs/use/tutorials/extending-target-stop-hooks.md b/lldb/docs/use/tutorials/extending-target-stop-hooks.md new file mode 100644 index 0000000000000..232187d0dcf11 --- /dev/null +++ b/lldb/docs/use/tutorials/extending-target-stop-hooks.md @@ -0,0 +1,25 @@ +# Extending Target Stop-Hooks + +Stop hooks fire whenever the process stops just before control is returned to the +user. Stop hooks can either be a set of lldb command-line commands, or can +be implemented by a suitably defined Python class. The Python-based stop-hooks +can also be passed as a set of -key -value pairs when they are added, and those +will get packaged up into a `SBStructuredData` Dictionary and passed to the +constructor of the Python object managing the stop hook. This allows for +parameterization of the stop hooks. + +To add a Python-based stop hook, first define a class with the following methods: + +| Name | Arguments | Description | +|------|-----------|-------------| +| `__init__` | `target: lldb.SBTarget` `extra_args: lldb.SBStructuredData` | This is the constructor for the new stop-hook. `target` is the SBTarget to which the stop hook is added. `extra_args` is an SBStructuredData object that the user can pass in when creating instances of this breakpoint. It is not required, but allows for reuse of stop-hook classes. | +| `handle_stop` | `exe_ctx: lldb.SBExecutionContext` `stream: lldb.SBStream` | This is the called when the target stops. `exe_ctx` argument will be filled with the current stop point for which the stop hook is being evaluated. `stream` an lldb.SBStream, anything written to this stream will be written to the debugger console. The return value is a "Should Stop" vote from this thread. If the method returns either True or no return this thread votes to stop. If it returns False, then the thread votes to continue after all the stop-hooks are evaluated. Note, the --auto-continue flag to 'target stop-hook add' overrides a True return value from the method. | + +To use this class in lldb, run the command: + +``` +(lldb) command script import MyModule.py +(lldb) target stop-hook add -P MyModule.MyStopHook -k first -v 1 -k second -v 2 +``` + +where `MyModule.py` is the file containing the class definition `MyStopHook`. \ No newline at end of file diff --git a/lldb/docs/use/tutorials/implementing-standalone-scripts.md b/lldb/docs/use/tutorials/implementing-standalone-scripts.md new file mode 100644 index 0000000000000..b8aaacf22fc2e --- /dev/null +++ b/lldb/docs/use/tutorials/implementing-standalone-scripts.md @@ -0,0 +1,134 @@ +# Implementing Standalone Scripts + +### Configuring `PYTHONPATH` + +LLDB has all of its core code built into a shared library which gets used by +the `lldb` command line application. +- On macOS this shared library is a framework: `LLDB.framework`. +- On other unix variants the program is a shared library: lldb.so. + +LLDB also provides an `lldb.py` module that contains the bindings from LLDB +into Python. To use the `LLDB.framework` to create your own stand-alone python +programs, you will need to tell python where to look in order to find this +module. This is done by setting the `PYTHONPATH` environment variable, +adding a path to the directory that contains the `lldb.py` python +module. The lldb driver program has an option to report the path to the lldb +module. You can use that to point to correct lldb.py: + +For csh and tcsh: + +```csh +% setenv PYTHONPATH `lldb -P` +``` + +For sh and bash: + +```bash +$ export PYTHONPATH=`lldb -P` +``` + +Alternatively, you can append the LLDB Python directory to the sys.path list +directly in your Python code before importing the lldb module. + +### Initialization + +The standard test for `__main__`, like many python modules do, is useful for +creating scripts that can be run from the command line. However, for command +line scripts, the debugger instance must be created manually. Sample code would +look like: + +```python3 +if __name__ == '__main__': + # Initialize the debugger before making any API calls. + lldb.SBDebugger.Initialize() + # Create a new debugger instance in your module if your module + # can be run from the command line. When we run a script from + # the command line, we won't have any debugger object in + # lldb.debugger, so we can just create it if it will be needed + debugger = lldb.SBDebugger.Create() + + # Next, do whatever work this module should do when run as a command. + # ... + + # Finally, dispose of the debugger you just made. + lldb.SBDebugger.Destroy(debugger) + # Terminate the debug session + lldb.SBDebugger.Terminate() +``` + +### Example + +Now your python scripts are ready to import the lldb module. Below is a python +script that will launch a program from the current working directory called +`a.out`, set a breakpoint at `main`, and then run and hit the breakpoint, and +print the process, thread and frame objects if the process stopped: + +```python3 +#!/usr/bin/env python3 + +import lldb +import os + +def disassemble_instructions(insts): + for i in insts: + print(i) + +# Set the path to the executable to debug +exe = "./a.out" + +# Create a new debugger instance +debugger = lldb.SBDebugger.Create() + +# When we step or continue, don't return from the function until the process +# stops. Otherwise we would have to handle the process events ourselves which, while doable is +# a little tricky. We do this by setting the async mode to false. +debugger.SetAsync(False) + +# Create a target from a file and arch +print("Creating a target for '%s'" % exe) + +target = debugger.CreateTargetWithFileAndArch(exe, lldb.LLDB_ARCH_DEFAULT) + +if target: + # If the target is valid set a breakpoint at main + main_bp = target.BreakpointCreateByName( + "main", target.GetExecutable().GetFilename() + ) + + print(main_bp) + + # Launch the process. Since we specified synchronous mode, we won't return + # from this function until we hit the breakpoint at main + process = target.LaunchSimple(None, None, os.getcwd()) + + # Make sure the launch went ok + if process: + # Print some simple process info + state = process.GetState() + print(process) + if state == lldb.eStateStopped: + # Get the first thread + thread = process.GetThreadAtIndex(0) + if thread: + # Print some simple thread info + print(thread) + # Get the first frame + frame = thread.GetFrameAtIndex(0) + if frame: + # Print some simple frame info + print(frame) + function = frame.GetFunction() + # See if we have debug info (a function) + if function: + # We do have a function, print some info for the function + print(function) + # Now get all instructions for this function and print them + insts = function.GetInstructions(target) + disassemble_instructions(insts) + else: + # See if we have a symbol in the symbol table for where we stopped + symbol = frame.GetSymbol() + if symbol: + # We do have a symbol, print some info for the symbol + print(symbol) +``` \ No newline at end of file diff --git a/lldb/docs/use/tutorials/python-embedded-interpreter.md b/lldb/docs/use/tutorials/python-embedded-interpreter.md new file mode 100644 index 0000000000000..719d746b35d43 --- /dev/null +++ b/lldb/docs/use/tutorials/python-embedded-interpreter.md @@ -0,0 +1,66 @@ +# Embedded Python Interpreter + +The embedded python interpreter can be accessed in a variety of ways from +within LLDB. The easiest way is to use the lldb command script with no +arguments at the lldb command prompt: + +```python3 +(lldb) script +Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. +>>> 2+3 +5 +>>> hex(12345) +'0x3039' +>>> +``` + +This drops you into the embedded python interpreter. When running under the +script command, lldb sets some convenience variables that give you quick access +to the currently selected entities that characterize the program and debugger +state. In each case, if there is no currently selected entity of the +appropriate type, the variable's IsValid method will return false. These +variables are: + +| Variable | Type | Equivalent | Description | +|----------|------|------------|-------------| +| `lldb.debugger` | `lldb.SBDebugger` | `SBTarget.GetDebugger` | Contains the debugger object whose `script` command was invoked. The `lldb.SBDebugger` object owns the command interpreter and all the targets in your debug session. There will always be a Debugger in the embedded interpreter. | +| `lldb.target` | `lldb.SBTarget` | `SBDebugger.GetSelectedTarget` `SBProcess.GetTarget` | Contains the currently selected target - for instance the one made with the `file` or selected by the `target select ` command. The `lldb.SBTarget` manages one running process, and all the executable and debug files for the process. | +| `lldb.process` | `lldb.SBProcess` | `SBTarget.GetProcess` `SBThread.GetProcess` | Contains the process of the currently selected target. The `lldb.SBProcess` object manages the threads and allows access to memory for the process. | +| `lldb.thread` | `lldb.SBThread` | `SBProcess.GetSelectedThread` `SBFrame.GetThread` | Contains the currently selected thread. The `lldb.SBThread` object manages the stack frames in that thread. A thread is always selected in the command interpreter when a target stops. The `thread select ` command can be used to change the currently selected thread. So as long as you have a stopped process, there will be some selected thread. | +| `lldb.frame` | `lldb.SBFrame` | `SBThread.GetSelectedFrame` | Contains the currently selected stack frame. The `lldb.SBFrame` object manage the stack locals and the register set for that stack. A stack frame is always selected in the command interpreter when a target stops. The `frame select ` command can be used to change the currently selected frame. So as long as you have a stopped process, there will be some selected frame. | + +While extremely convenient, these variables have a couple caveats that you +should be aware of. First of all, they hold the values of the selected objects +on entry to the embedded interpreter. They do not update as you use the LLDB +API's to change, for example, the currently selected stack frame or thread. + +Moreover, they are only defined and meaningful while in the interactive Python +interpreter. There is no guarantee on their value in any other situation, hence +you should not use them when defining Python formatters, breakpoint scripts and +commands (or any other Python extension point that LLDB provides). For the +latter you'll be passed an `SBDebugger`, `SBTarget`, `SBProcess`, `SBThread` or +`SBFrame` instance and you can use the functions from the "Equivalent" column +to navigate between them. + +As a rationale for such behavior, consider that lldb can run in a multithreaded +environment, and another thread might call the "script" command, changing the +value out from under you. + +To get started with these objects and LLDB scripting, please note that almost +all of the lldb Python objects are able to briefly describe themselves when you +pass them to the Python print function: + +```python3 +(lldb) script +Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. +>>> print(lldb.debugger) +Debugger (instance: "debugger_1", id: 1) +>>> print(lldb.target) +a.out +>>> print(lldb.process) +SBProcess: pid = 58842, state = stopped, threads = 1, executable = a.out +>>> print(lldb.thread) +thread #1: tid = 0x2265ce3, 0x0000000100000334 a.out`main at t.c:2:3, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1 +>>> print(lldb.frame) +frame #0: 0x0000000100000334 a.out`main at t.c:2:3 +``` \ No newline at end of file diff --git a/lldb/docs/use/tutorials/script-driven-debugging.md b/lldb/docs/use/tutorials/script-driven-debugging.md new file mode 100644 index 0000000000000..55b90b1e25bf5 --- /dev/null +++ b/lldb/docs/use/tutorials/script-driven-debugging.md @@ -0,0 +1,492 @@ +# Script-Driven Debugging + +LLDB has been structured from the beginning to be scriptable in two +ways: +- a Unix Python session can initiate/run a debug session non-interactively +using LLDB; +- and within the LLDB debugger tool, Python scripts can be used to help with +many tasks, including inspecting program data, iterating over containers and +determining if a breakpoint should stop execution or continue. + +This document will show how to do some of these things by going through an +example, explaining how to use Python scripting to find a bug in a program +that searches for text in a large binary tree. + +### The Test Program and Input + +We have a simple C program ([dictionary.c](https://github.com/llvm/llvm-project/blob/main/lldb/examples/scripting/dictionary.c)) +that reads in a text file, and stores all the words from the file in a +Binary Search Tree, sorted alphabetically. It then enters a loop +prompting the user for a word, searching for the word in the tree +(using Binary Search), and reporting to the user whether or not it found +the word in the tree. + +The input text file we are using to test our program contains the text +for William Shakespeare's famous tragedy "Romeo and Juliet". + +### The Bug + +When we try running our program, we find there is a problem. While it +successfully finds some of the words we would expect to find, such as +"love" or "sun", it fails to find the word "Romeo", which **MUST** be in +the input text file: + +```shell +$ ./dictionary Romeo-and-Juliet.txt +Dictionary loaded. +Enter search word: love +Yes! +Enter search word: sun +Yes! +Enter search word: Romeo +No! +Enter search word: ^D +$ +``` + +### Using Depth First Search + +Our first job is to determine if the word "Romeo" actually got inserted +into the tree or not. Since "Romeo and Juliet" has thousands of words, +trying to examine our binary search tree by hand is completely +impractical. Therefore we will write a Python script to search the tree +for us. We will write a recursive Depth First Search function that +traverses the entire tree searching for a word, and maintaining +information about the path from the root of the tree to the current +node. If it finds the word in the tree, it returns the path from the +root to the node containing the word. This is what our DFS function in +Python would look like, with line numbers added for easy reference in +later explanations: + +```python3 +1: def DFS (root, word, cur_path): +2: root_word_ptr = root.GetChildMemberWithName ("word") +3: left_child_ptr = root.GetChildMemberWithName ("left") +4: right_child_ptr = root.GetChildMemberWithName ("right") +5: root_word = root_word_ptr.GetSummary() +6: end = len (root_word) - 1 +7: if root_word[0] == '"' and root_word[end] == '"': +8: root_word = root_word[1:end] +9: end = len (root_word) - 1 +10: if root_word[0] == '\'' and root_word[end] == '\'': +11: root_word = root_word[1:end] +12: if root_word == word: +13: return cur_path +14: elif word < root_word: +15: if left_child_ptr.GetValue() is None: +16: return "" +17: else: +18: cur_path = cur_path + "L" +19: return DFS (left_child_ptr, word, cur_path) +20: else: +21: if right_child_ptr.GetValue() is None: +22: return "" +23: else: +24: cur_path = cur_path + "R" +25: return DFS (right_child_ptr, word, cur_path) +``` + +### Accessing & Manipulating Program Variables + +Before we can call any Python function on any of our program's +variables, we need to get the variable into a form that Python can +access. To show you how to do this we will look at the parameters for +the DFS function. The first parameter is going to be a node in our +binary search tree, put into a Python variable. The second parameter is +the word we are searching for (a string), and the third parameter is a +string representing the path from the root of the tree to our current +node. + +The most interesting parameter is the first one, the Python variable +that needs to contain a node in our search tree. How can we take a +variable out of our program and put it into a Python variable? What +kind of Python variable will it be? The answers are to use the LLDB API +functions, provided as part of the LLDB Python module. Running Python +from inside LLDB, LLDB will automatically give us our current frame +object as a Python variable, "lldb.frame". This variable has the type +`SBFrame` (see the LLDB API for more information about `SBFrame` +objects). One of the things we can do with a frame object, is to ask it +to find and return its local variable. We will call the API function +`SBFrame.FindVariable` on the `lldb.frame` object to give us our +dictionary variable as a Python variable: + +```python3 +root = lldb.frame.FindVariable ("dictionary") +``` + +The line above, executed in the Python script interpreter in LLDB, asks the +current frame to find the variable named "dictionary" and return it. We then +store the returned value in the Python variable named "root". This answers the +question of HOW to get the variable, but it still doesn't explain WHAT actually +gets put into "root". If you examine the LLDB API, you will find that the +`SBFrame` method "FindVariable" returns an object of type `SBValue`. `SBValue` +objects are used, among other things, to wrap up program variables and values. +There are many useful methods defined in the `SBValue` class to allow you to get +information or children values out of SBValues. For complete information, see +the header file SBValue.h. The `SBValue` methods that we use in our DFS function +are `GetChildMemberWithName()`, `GetSummary()`, and `GetValue()`. + +### Explaining DFS Script in Detail + +Before diving into the details of this code, it would be best to give a +high-level overview of what it does. The nodes in our binary search tree were +defined to have type `tree_node *`, which is defined as: + +```c++ +typedef struct tree_node +{ + const char *word; + struct tree_node *left; + struct tree_node *right; +} tree_node; +``` + +Lines 2-11 of DFS are getting data out of the current tree node and getting +ready to do the actual search; lines 12-25 are the actual depth-first search. +Lines 2-4 of our DFS function get the word, left and right fields out of the +current node and store them in Python variables. Since root_word_ptr is a +pointer to our word, and we want the actual word, line 5 calls GetSummary() to +get a string containing the value out of the pointer. Since GetSummary() adds +quotes around its result, lines 6-11 strip surrounding quotes off the word. + +Line 12 checks to see if the word in the current node is the one we are +searching for. If so, we are done, and line 13 returns the current path. +Otherwise, line 14 checks to see if we should go left (search word comes before +the current word). If we decide to go left, line 15 checks to see if the left +pointer child is NULL ("None" is the Python equivalent of NULL). If the left +pointer is NULL, then the word is not in this tree and we return an empty path +(line 16). Otherwise, we add an "L" to the end of our current path string, to +indicate we are going left (line 18), and then recurse on the left child (line +19). Lines 20-25 are the same as lines 14-19, except for going right rather +than going left. + +One other note: Typing something as long as our DFS function directly into the +interpreter can be difficult, as making a single typing mistake means having to +start all over. Therefore we recommend doing as we have done: Writing your +longer, more complicated script functions in a separate file (in this case +tree_utils.py) and then importing it into your LLDB Python interpreter. + +### The DFS Script in Action + +At this point we are ready to use the DFS function to see if the word "Romeo" +is in our tree or not. To actually use it in LLDB on our dictionary program, +you would do something like this: + +```c++ +$ lldb +(lldb) process attach -n "dictionary" +Architecture set to: x86_64. +Process 521 stopped +* thread #1: tid = 0x2c03, 0x00007fff86c8bea0 libSystem.B.dylib`read$NOCANCEL + 8, stop reason = signal SIGSTOP +frame #0: 0x00007fff86c8bea0 libSystem.B.dylib`read$NOCANCEL + 8 +(lldb) breakpoint set -n find_word +Breakpoint created: 1: name = 'find_word', locations = 1, resolved = 1 +(lldb) continue +Process 521 resuming +Process 521 stopped +* thread #1: tid = 0x2c03, 0x0000000100001830 dictionary`find_word + 16 +at dictionary.c:105, stop reason = breakpoint 1.1 +frame #0: 0x0000000100001830 dictionary`find_word + 16 at dictionary.c:105 +102 int +103 find_word (tree_node *dictionary, char *word) +104 { +-> 105 if (!word || !dictionary) +106 return 0; +107 +108 int compare_value = strcmp (word, dictionary->word); +(lldb) script +``` +```python3 +Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. +>>> import tree_utils +>>> root = lldb.frame.FindVariable ("dictionary") +>>> current_path = "" +>>> path = tree_utils.DFS (root, "Romeo", current_path) +>>> print path +LLRRL +>>> ^D +(lldb) +``` + +The first bit of code above shows starting lldb, attaching to the dictionary +program, and getting to the find_word function in LLDB. The interesting part +(as far as this example is concerned) begins when we enter the script command +and drop into the embedded interactive Python interpreter. We will go over this +Python code line by line. The first line + +```python3 +import tree_utils +``` + +imports the file where we wrote our DFS function, tree_utils.py, into Python. +Notice that to import the file we leave off the ".py" extension. We can now +call any function in that file, giving it the prefix "tree_utils.", so that +Python knows where to look for the function. The line + +```python3 +root = lldb.frame.FindVariable ("dictionary") +``` + +gets our program variable "dictionary" (which contains the binary search tree) +and puts it into the Python variable "root". See Accessing & Manipulating +Program Variables in Python above for more details about how this works. The +next line is + +```python3 +current_path = "" +``` + +This line initializes the current_path from the root of the tree to our current +node. Since we are starting at the root of the tree, our current path starts as +an empty string. As we go right and left through the tree, the DFS function +will append an 'R' or an 'L' to the current path, as appropriate. The line + +```python3 +path = tree_utils.DFS (root, "Romeo", current_path) +``` + +calls our DFS function (prefixing it with the module name so that Python can +find it). We pass in our binary tree stored in the variable root, the word we +are searching for, and our current path. We assign whatever path the DFS +function returns to the Python variable path. + +Finally, we want to see if the word was found or not, and if so we want to see +the path through the tree to the word. So we do + +```python3 +print path +``` + +From this we can see that the word "Romeo" was indeed found in the tree, and +the path from the root of the tree to the node containing "Romeo" is +left-left-right-right-left. + +### Using Breakpoint Command Scripts + +We are halfway to figuring out what the problem is. We know the word we are +looking for is in the binary tree, and we know exactly where it is in the +binary tree. Now we need to figure out why our binary search algorithm is not +finding the word. We will do this using breakpoint command scripts. + +The idea is as follows. The binary search algorithm has two main decision +points: the decision to follow the right branch; and, the decision to follow +the left branch. We will set a breakpoint at each of these decision points, and +attach a Python breakpoint command script to each breakpoint. The breakpoint +commands will use the global path Python variable that we got from our DFS +function. Each time one of these decision breakpoints is hit, the script will +compare the actual decision with the decision the front of the path variable +says should be made (the first character of the path). If the actual decision +and the path agree, then the front character is stripped off the path, and +execution is resumed. In this case the user never even sees the breakpoint +being hit. But if the decision differs from what the path says it should be, +then the script prints out a message and does NOT resume execution, leaving the +user sitting at the first point where a wrong decision is being made. + +### Python Breakpoint Command Scripts Are Not What They Seem + +What do we mean by that? When you enter a Python breakpoint command in LLDB, it +appears that you are entering one or more plain lines of Python. BUT LLDB then +takes what you entered and wraps it into a Python FUNCTION (just like using the +"def" Python command). It automatically gives the function an obscure, unique, +hard-to-stumble-across function name, and gives it two parameters: frame and +bp_loc. When the breakpoint gets hit, LLDB wraps up the frame object where the +breakpoint was hit, and the breakpoint location object for the breakpoint that +was hit, and puts them into Python variables for you. It then calls the Python +function that was created for the breakpoint command, and passes in the frame +and breakpoint location objects. + +So, being practical, what does this mean for you when you write your Python +breakpoint commands? It means that there are two things you need to keep in +mind: 1. If you want to access any Python variables created outside your +script, you must declare such variables to be global. If you do not declare +them as global, then the Python function will treat them as local variables, +and you will get unexpected behavior. 2. All Python breakpoint command scripts +automatically have a frame and a bp_loc variable. The variables are pre-loaded +by LLDB with the correct context for the breakpoint. You do not have to use +these variables, but they are there if you want them. + +### The Decision Point Breakpoint Commands + +This is what the Python breakpoint command script would look like for the +decision to go right: + +```python3 +global path +if path[0] == 'R': + path = path[1:] + thread = frame.GetThread() + process = thread.GetProcess() + process.Continue() +else: + print "Here is the problem; going right, should go left!" +``` + +Just as a reminder, LLDB is going to take this script and wrap it up in a function, like this: + +```python3 +def some_unique_and_obscure_function_name (frame, bp_loc): + global path + if path[0] == 'R': + path = path[1:] + thread = frame.GetThread() + process = thread.GetProcess() + process.Continue() + else: + print "Here is the problem; going right, should go left!" +``` + +LLDB will call the function, passing in the correct frame and breakpoint +location whenever the breakpoint gets hit. There are several things to notice +about this function. The first one is that we are accessing and updating a +piece of state (the path variable), and actually conditioning our behavior +based upon this variable. Since the variable was defined outside of our script +(and therefore outside of the corresponding function) we need to tell Python +that we are accessing a global variable. That is what the first line of the +script does. Next we check where the path says we should go and compare it to +our decision (recall that we are at the breakpoint for the decision to go +right). If the path agrees with our decision, then we strip the first character +off of the path. + +Since the decision matched the path, we want to resume execution. To do this we +make use of the frame parameter that LLDB guarantees will be there for us. We +use LLDB API functions to get the current thread from the current frame, and +then to get the process from the thread. Once we have the process, we tell it +to resume execution (using the Continue() API function). + +If the decision to go right does not agree with the path, then we do not resume +execution. We allow the breakpoint to remain stopped (by doing nothing), and we +print an informational message telling the user we have found the problem, and +what the problem is. + +### Actually Using The Breakpoint Commands + +Now we will look at what happens when we actually use these breakpoint commands +on our program. Doing a source list -n find_word shows us the function +containing our two decision points. Looking at the code below, we see that we +want to set our breakpoints on lines 113 and 115: + +```c++ +(lldb) source list -n find_word +File: /Volumes/Data/HD2/carolinetice/Desktop/LLDB-Web-Examples/dictionary.c. +101 +102 int +103 find_word (tree_node *dictionary, char *word) +104 { +105 if (!word || !dictionary) +106 return 0; +107 +108 int compare_value = strcmp (word, dictionary->word); +109 +110 if (compare_value == 0) +111 return 1; +112 else if (compare_value < 0) +113 return find_word (dictionary->left, word); +114 else +115 return find_word (dictionary->right, word); +116 } +117 +``` + +So, we set our breakpoints, enter our breakpoint command scripts, and see what happens: + +```c++ +(lldb) breakpoint set -l 113 +Breakpoint created: 2: file ="dictionary.c", line = 113, locations = 1, resolved = 1 +(lldb) breakpoint set -l 115 +Breakpoint created: 3: file ="dictionary.c", line = 115, locations = 1, resolved = 1 +(lldb) breakpoint command add -s python 2 +``` +```python3 +Enter your Python command(s). Type 'DONE' to end. +> global path +> if (path[0] == 'L'): +> path = path[1:] +> thread = frame.GetThread() +> process = thread.GetProcess() +> process.Continue() +> else: +> print "Here is the problem. Going left, should go right!" +> DONE +``` +```c++ +(lldb) breakpoint command add -s python 3 +``` +```python3 +Enter your Python command(s). Type 'DONE' to end. +> global path +> if (path[0] == 'R'): +> path = path[1:] +> thread = frame.GetThread() +> process = thread.GetProcess() +> process.Continue() +> else: +> print "Here is the problem. Going right, should go left!" +> DONE +``` +```c++ +(lldb) continue +Process 696 resuming +Here is the problem. Going right, should go left! +Process 696 stopped +* thread #1: tid = 0x2d03, 0x000000010000189f dictionary`find_word + 127 at dictionary.c:115, stop reason = breakpoint 3.1 +frame #0: 0x000000010000189f dictionary`find_word + 127 at dictionary.c:115 + 112 else if (compare_value < 0) + 113 return find_word (dictionary->left, word); + 114 else +-> 115 return find_word (dictionary->right, word); + 116 } + 117 + 118 void +(lldb) +``` + +After setting our breakpoints, adding our breakpoint commands and continuing, +we run for a little bit and then hit one of our breakpoints, printing out the +error message from the breakpoint command. Apparently at this point in the +tree, our search algorithm decided to go right, but our path says the node we +want is to the left. Examining the word at the node where we stopped, and our +search word, we see: + +```c++ +(lldb) expr dictionary->word +(const char *) $1 = 0x0000000100100080 "dramatis" +(lldb) expr word +(char *) $2 = 0x00007fff5fbff108 "romeo" +``` + +So the word at our current node is "dramatis", and the word we are searching +for is "romeo". "romeo" comes after "dramatis" alphabetically, so it seems like +going right would be the correct decision. Let's ask Python what it thinks the +path from the current node to our word is: + +```c++ +(lldb) script print path +LLRRL +``` + +According to Python we need to go left-left-right-right-left from our current +node to find the word we are looking for. Let's double check our tree, and see +what word it has at that node: + +```c++ +(lldb) expr dictionary->left->left->right->right->left->word +(const char *) $4 = 0x0000000100100880 "Romeo" +``` + +So the word we are searching for is "romeo" and the word at our DFS location is +"Romeo". Aha! One is uppercase and the other is lowercase: We seem to have a +case conversion problem somewhere in our program (we do). + +This is the end of our example on how you might use Python scripting in LLDB to +help you find bugs in your program. + +### Sources + +The complete code for the Dictionary program (with case-conversion bug), the +DFS function and other Python script examples used for this example are +available below. + +- [tree_utils.py](https://github.com/llvm/llvm-project/blob/main/lldb/examples/scripting/tree_utils.py) - Example Python functions using LLDB's API, including DFS +- [dictionary.c](https://github.com/llvm/llvm-project/blob/main/lldb/examples/scripting/dictionary.c) - Sample dictionary program, with bug +- The text for "Romeo and Juliet" can be obtained from [the Gutenberg Project](https://www.gutenberg.org). + diff --git a/lldb/docs/use/tutorials/writing-custom-commands.md b/lldb/docs/use/tutorials/writing-custom-commands.md new file mode 100644 index 0000000000000..d53b7e473a505 --- /dev/null +++ b/lldb/docs/use/tutorials/writing-custom-commands.md @@ -0,0 +1,429 @@ +# Writing Custom Commands + +### Create a new command using a Python function + +Python functions can be used to create new LLDB command interpreter commands, +which will work like all the natively defined lldb commands. This provides a +very flexible and easy way to extend LLDB to meet your debugging requirements. + +To write a python function that implements a new LLDB command define the +function to take five arguments as follows: + +```python3 +def command_function(debugger, command, exe_ctx, result, internal_dict): + # Your code goes here +``` + +The meaning of the arguments is given in the table below. + +If you provide a Python docstring in your command function LLDB will use it +when providing "long help" for your command, as in: + +```python3 +def command_function(debugger, command, result, internal_dict): + """This command takes a lot of options and does many fancy things""" + # Your code goes here +``` + +though providing help can also be done programmatically (see below). + +Prior to lldb 3.5.2 (April 2015), LLDB Python command definitions didn't take the SBExecutionContext +argument. So you may still see commands where the command definition is: + +```python3 +def command_function(debugger, command, result, internal_dict): + # Your code goes here +``` + +Using this form is strongly discouraged because it can only operate on the "currently selected" +target, process, thread, frame. The command will behave as expected when run +directly on the command line. But if the command is used in a stop-hook, breakpoint +callback, etc. where the response to the callback determines whether we will select +this or that particular process/frame/thread, the global "currently selected" +entity is not necessarily the one the callback is meant to handle. In that case, this +command definition form can't do the right thing. + +| Argument | Type | Description | +|----------|------|-------------| +| `debugger` | `lldb.SBDebugger` | The current debugger object. | +| `command` | `python string` | A python string containing all arguments for your command. If you need to chop up the arguments try using the `shlex` module's `shlex.split(command)` to properly extract the arguments. | +| `exe_ctx` | `lldb.SBExecutionContext` | An execution context object carrying around information on the inferior process' context in which the command is expected to act *Optional since lldb 3.5.2, unavailable before* | +| `result` | `lldb.SBCommandReturnObject` | A return object which encapsulates success/failure information for the command and output text that needs to be printed as a result of the command. The plain Python "print" command also works but text won't go in the result by default (it is useful as a temporary logging facility). | +| `internal_dict` | `python dict object` | The dictionary for the current embedded script session which contains all variables and functions. | + +### Create a new command using a Python class + +Since lldb 3.7, Python commands can also be implemented by means of a class +which should implement the following interface: + +```python3 +class CommandObjectType: + def __init__(self, debugger, internal_dict): + # this call should initialize the command with respect to the command interpreter for the passed-in debugger + + def __call__(self, debugger, command, exe_ctx, result): + # this is the actual bulk of the command, akin to Python command functions + + def get_short_help(self): + # this call should return the short help text for this command[1] + + def get_long_help(self): + # this call should return the long help text for this command[1] + + def get_flags(self): + # this will be called when the command is added to the command interpreter, + # and should return a flag field made from or-ing together the appropriate + # elements of the lldb.CommandFlags enum to specify the requirements of this command. + # The CommandInterpreter will make sure all these requirements are met, and will + # return the standard lldb error if they are not.[1] + + def get_repeat_command(self, command): + # The auto-repeat command is what will get executed when the user types just + # a return at the next prompt after this command is run. Even if your command + # was run because it was specified as a repeat command, that invocation will still + # get asked for IT'S repeat command, so you can chain a series of repeats, for instance + # to implement a pager. + + # The command argument is the command that is about to be executed. + + # If this call returns None, then the ordinary repeat mechanism will be used + # If this call returns an empty string, then auto-repeat is disabled + # If this call returns any other string, that will be the repeat command [1] +``` + +[1] This method is optional. + +As a convenience, you can treat the result object as a Python file object, and +say + +```python3 +print("my command does lots of cool stuff", file=result) +``` + +`SBCommandReturnObject` and `SBStream` both support this file-like behavior by +providing `write()` and `flush()` calls at the Python layer. + +### Parsed Commands + +The commands that are added using this class definition are what lldb calls +"raw" commands. The command interpreter doesn't attempt to parse the command, +doesn't handle option values, neither generating help for them, or their +completion. Raw commands are useful when the arguments passed to the command +are unstructured, and having to protect them against lldb command parsing would +be onerous. For instance, "expr" is a raw command. + +You can also add scripted commands that implement the "parsed command", where +the options and their types are specified, as well as the argument and argument +types. These commands look and act like the majority of lldb commands, and you +can also add custom completions for the options and/or the arguments if you have +special needs. + +The easiest way to do this is to derive your new command from the lldb.ParsedCommand +class. That responds in the same way to the help & repeat command interfaces, and +provides some convenience methods, and most importantly an LLDBOptionValueParser, +accessed through lldb.ParsedCommand.get_parser(). The parser is used to set +your command definitions, and to retrieve option values in the `__call__` method. + +To set up the command definition, implement the ParsedCommand abstract method: + +```python3 +def setup_command_definition(self): +``` + +This is called when your command is added to lldb. In this method you add the +options and their types, the option help strings, etc. to the command using the API: + +```python3 +def add_option(self, short_option, long_option, help, default, + dest = None, required=False, groups = None, + value_type=lldb.eArgTypeNone, completion_type=None, + enum_values=None): + """ + short_option: one character, must be unique, not required + long_option: no spaces, must be unique, required + help: a usage string for this option, will print in the command help + default: the initial value for this option (if it has a value) + dest: the name of the property that gives you access to the value for + this value. Defaults to the long option if not provided. + required: if true, this option must be provided or the command will error out + groups: Which "option groups" does this option belong to. This can either be + a simple list (e.g. [1, 3, 4, 5]) or you can specify ranges by sublists: + so [1, [3,5]] is the same as [1, 3, 4, 5]. + value_type: one of the lldb.eArgType enum values. Some of the common arg + types also have default completers, which will be applied automatically. + completion_type: currently these are values form the lldb.CompletionType enum. If + you need custom completions, implement handle_option_argument_completion. + enum_values: An array of duples: ["element_name", "element_help"]. If provided, + only one of the enum elements is allowed. The value will be the + element_name for the chosen enum element as a string. + """ +``` + +Similarly, you can add argument types to the command: + +```python3 +def make_argument_element(self, arg_type, repeat = "optional", groups = None): + """ + arg_type: The argument type, one of the lldb.eArgType enum values. + repeat: Choose from the following options: + "plain" - one value + "optional" - zero or more values + "plus" - one or more values + groups: As with add_option. + """ +``` + +Then implement the body of the command by defining: + +```python3 +def __call__(self, debugger, args_array, exe_ctx, result): + """This is the command callback. The option values are + provided by the 'dest' properties on the parser. + + args_array: This is the list of arguments provided. + exe_ctx: Gives the SBExecutionContext on which the + command should operate. + result: Any results of the command should be + written into this SBCommandReturnObject. + """ +``` + +This differs from the "raw" command's `__call__` in that the arguments are already +parsed into the args_array, and the option values are set in the parser, and +can be accessed using their property name. The LLDBOptionValueParser class has +a couple of other handy methods: + +```python3 +def was_set(self, long_option_name): +``` + +returns `True` if the option was specified on the command line. + +```python +def dest_for_option(self, long_option_name): +""" +This will return the value of the dest variable you defined for opt_name. +Mostly useful for handle_completion where you get passed the long option. +""" +``` + +### Completion + +lldb will handle completing your option names, and all your enum values +automatically. If your option or argument types have associated built-in completers, +then lldb will also handle that completion for you. But if you have a need for +custom completions, either in your arguments or option values, you can handle +completion by hand as well. To handle completion of option value arguments, +your lldb.ParsedCommand subclass should implement: + +```python3 +def handle_option_argument_completion(self, long_option, cursor_pos): +""" +long_option: The long option name of the option whose value you are + asked to complete. +cursor_pos: The cursor position in the value for that option - which +you can get from the option parser. +""" +``` + +And to handle the completion of arguments: + +```python3 +def handle_argument_completion(self, args, arg_pos, cursor_pos): +""" +args: A list of the arguments to the command +arg_pos: An index into the args list of the argument with the cursor +cursor_pos: The cursor position in the arg specified by arg_pos +""" +``` + +When either of these API's is called, the command line will have been parsed up to +the word containing the cursor, and any option values set in that part of the command +string are available from the option value parser. That's useful for instance +if you have a --shared-library option that would constrain the completions for, +say, a symbol name option or argument. + +The return value specifies what the completion options are. You have four +choices: + +- `True`: the completion was handled with no completions. + +- `False`: the completion was not handled, forward it to the regular +completion machinery. + +- A dictionary with the key: "completion": there is one candidate, +whose value is the value of the "completion" key. Optionally you can pass a +"mode" key whose value is either "partial" or "complete". Return partial if +the "completion" string is a prefix for all the completed value. + +For instance, if the string you are completing is "Test" and the available completions are: +"Test1", "Test11" and "Test111", you should return the dictionary: + +```python3 +return {"completion": "Test1", "mode" : "partial"} +``` + +and then lldb will add the "1" at the cursor and advance it after the added string, +waiting for more completions. But if "Test1" is the only completion, return: + +```python3 +{"completion": "Test1", "mode": "complete"} +``` + +and lldb will add "1 " at the cursor, indicating the command string is complete. + +The default is "complete", you don't need to specify a "mode" in that case. + +- A dictionary with the key: "values" whose value is a list of candidate completion +strings. The command interpreter will present those strings as the available choices. +You can optionally include a "descriptions" key, whose value is a parallel array +of description strings, and the completion will show the description next to +each completion. + +### Loading Commands + +One other handy convenience when defining lldb command-line commands is the +command "command script import" which will import a module specified by file +path, so you don't have to change your PYTHONPATH for temporary scripts. It +also has another convenience that if your new script module has a function of +the form: + +```python +def __lldb_init_module(debugger, internal_dict): + # Command Initialization code goes here +``` + +where debugger and internal_dict are as above, that function will get run when +the module is loaded allowing you to add whatever commands you want into the +current debugger. Note that this function will only be run when using the LLDB +command `command script import`, it will not get run if anyone imports your +module from another module. + +Another way to load custom commands in lldb is to use the +`@lldb.command(command_name=None, doc=None)` decorator. + +```python3 +@lldb.command() +def goodstuff(debugger, command, ctx, result, internal_dict): + """command help string""" + # Command Implementation code goes here +``` + +### Examples + +Now we can create a module called ls.py in the file ~/ls.py that will implement +a function that can be used by LLDB's python command code: + +```python3 +#!/usr/bin/env python3 + +import lldb +import subprocess + +def ls(debugger, command, result, internal_dict): + output = subprocess.check_output(["/bin/ls"] + command.split(), text=True) + print(output, file=result) + +# And the initialization code to add your commands +def __lldb_init_module(debugger, internal_dict): + debugger.HandleCommand('command script add -f ls.ls ls') + print('The "ls" python command has been installed and is ready for use.') +``` + +Now we can load the module into LLDB and use it + +```shell +$ lldb +(lldb) command script import ~/ls.py +The "ls" python command has been installed and is ready for use. +(lldb) ls -l /tmp/ +total 365848 +-rw------- 1 someuser wheel 7331 Jan 19 15:37 crash.log +``` + +You can also make "container" commands to organize the commands you are adding to +lldb. Most of the lldb built-in commands structure themselves this way, and using +a tree structure has the benefit of leaving the one-word command space free for user +aliases. It can also make it easier to find commands if you are adding more than +a few of them. Here's a trivial example of adding two "utility" commands into a +"my-utilities" container: + +```python3 +#!/usr/bin/env python + +import lldb + +def first_utility(debugger, command, result, internal_dict): + print("I am the first utility") + +def second_utility(debugger, command, result, internal_dict): + print("I am the second utility") + +# And the initialization code to add your commands +def __lldb_init_module(debugger, internal_dict): + debugger.HandleCommand('command container add -h "A container for my utilities" my-utilities') + debugger.HandleCommand('command script add -f my_utilities.first_utility -h "My first utility" my-utilities first') + debugger.HandleCommand('command script add -f my_utilities.second_utility -h "My second utility" my-utilities second') + print('The "my-utilities" python command has been installed and its subcommands are ready for use.') +``` + +Then your new commands are available under the my-utilities node: + +``` +(lldb) help my-utilities +A container for my utilities + +Syntax: my-utilities + +The following subcommands are supported: + + first -- My first utility Expects 'raw' input (see 'help raw-input'.) + second -- My second utility Expects 'raw' input (see 'help raw-input'.) + +For more help on any particular subcommand, type 'help '. +(lldb) my-utilities first +I am the first utility +``` + +A more interesting [template](https://github.com/llvm/llvm-project/blob/main/lldb/examples/python/cmdtemplate.py) +has been created in the source repository that can help you to create lldb command quickly. + +A commonly required facility is being able to create a command that does some +token substitution, and then runs a different debugger command (usually, it +po'es the result of an expression evaluated on its argument). For instance, +given the following program: + +```objc +#import +NSString* +ModifyString(NSString* src) +{ + return [src stringByAppendingString:@"foobar"]; +} + +int main() +{ + NSString* aString = @"Hello world"; + NSString* anotherString = @"Let's be friends"; + return 1; +} +``` + +you may want a `pofoo` X command, that equates po [ModifyString(X) +capitalizedString]. The following debugger interaction shows how to achieve +that goal: + +```python3 +(lldb) script +Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. +>>> def pofoo_funct(debugger, command, result, internal_dict): +... cmd = "po [ModifyString(" + command + ") capitalizedString]" +... debugger.HandleCommand(cmd) +... +>>> ^D +(lldb) command script add pofoo -f pofoo_funct +(lldb) pofoo aString +$1 = 0x000000010010aa00 Hello Worldfoobar +(lldb) pofoo anotherString +$2 = 0x000000010010aba0 Let's Be Friendsfoobar +``` \ No newline at end of file From 0c3cf200f5b918fb5c1114e9f1764c2d54d1779b Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Fri, 12 Sep 2025 21:48:41 -0700 Subject: [PATCH 211/734] [MemProf] Optionally allow transformation of nobuiltin operator new (#158396) For cases where we can guarantee the application does not override operator new. --- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 2 +- .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 43 +++++++++---- .../InstCombine/simplify-libcalls-new.ll | 60 ++++++++++++------- 3 files changed, 70 insertions(+), 35 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index deb3d6c44ef09..4e7c97194cc59 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -189,7 +189,7 @@ class LibCallSimplifier { Value *optimizeMemSet(CallInst *CI, IRBuilderBase &B); Value *optimizeRealloc(CallInst *CI, IRBuilderBase &B); Value *optimizeNew(CallInst *CI, IRBuilderBase &B, LibFunc &Func); - Value *optimizeExistingHotColdNew(CallInst *CI, IRBuilderBase &B); + Value *maybeOptimizeNoBuiltinOperatorNew(CallInst *CI, IRBuilderBase &B); Value *optimizeWcslen(CallInst *CI, IRBuilderBase &B); Value *optimizeBCopy(CallInst *CI, IRBuilderBase &B); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 8acebbaa5458b..4a1565977b91c 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -61,6 +61,9 @@ static cl::opt OptimizeExistingHotColdNew( "optimize-existing-hot-cold-new", cl::Hidden, cl::init(false), cl::desc( "Enable optimization of existing hot/cold operator new library calls")); +static cl::opt OptimizeNoBuiltinHotColdNew( + "optimize-nobuiltin-hot-cold-new-new", cl::Hidden, cl::init(false), + cl::desc("Enable transformation of nobuiltin operator new library calls")); namespace { @@ -1723,13 +1726,11 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) { return nullptr; } -// Allow existing calls to operator new() that takes a __hot_cold_t parameter to -// be updated with a compiler-determined hot cold hint value. This is used in -// cases where the call is marked nobuiltin (because operator new called -// explicitly) and therefore cannot be replaced with a different callee. -Value *LibCallSimplifier::optimizeExistingHotColdNew(CallInst *CI, - IRBuilderBase &B) { - if (!OptimizeHotColdNew || !OptimizeExistingHotColdNew) +// Optionally allow optimization of nobuiltin calls to operator new and its +// variants. +Value *LibCallSimplifier::maybeOptimizeNoBuiltinOperatorNew(CallInst *CI, + IRBuilderBase &B) { + if (!OptimizeHotColdNew) return nullptr; Function *Callee = CI->getCalledFunction(); if (!Callee) @@ -1738,6 +1739,22 @@ Value *LibCallSimplifier::optimizeExistingHotColdNew(CallInst *CI, if (!TLI->getLibFunc(*Callee, Func)) return nullptr; switch (Func) { + case LibFunc_Znwm: + case LibFunc_ZnwmRKSt9nothrow_t: + case LibFunc_ZnwmSt11align_val_t: + case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: + case LibFunc_Znam: + case LibFunc_ZnamRKSt9nothrow_t: + case LibFunc_ZnamSt11align_val_t: + case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: + case LibFunc_size_returning_new: + case LibFunc_size_returning_new_aligned: + // By default normal operator new calls (not already passing a hot_cold_t + // parameter) are not mutated if the call is not marked builtin. Optionally + // enable that in cases where it is known to be safe. + if (!OptimizeNoBuiltinHotColdNew) + return nullptr; + break; case LibFunc_Znwm12__hot_cold_t: case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: @@ -1748,10 +1765,15 @@ Value *LibCallSimplifier::optimizeExistingHotColdNew(CallInst *CI, case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: case LibFunc_size_returning_new_hot_cold: case LibFunc_size_returning_new_aligned_hot_cold: - return optimizeNew(CI, B, Func); + // If the nobuiltin call already passes a hot_cold_t parameter, allow update + // of that parameter when enabled. + if (!OptimizeExistingHotColdNew) + return nullptr; + break; default: return nullptr; } + return optimizeNew(CI, B, Func); } // When enabled, replace operator new() calls marked with a hot or cold memprof @@ -4121,9 +4143,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { // we can all non-FP calls with the StrictFP attribute to be // optimized. if (CI->isNoBuiltin()) { - // If this is an existing call to a hot cold operator new, we can update the - // hint parameter value, which doesn't change the callee. - return optimizeExistingHotColdNew(CI, Builder); + // Optionally update operator new calls. + return maybeOptimizeNoBuiltinOperatorNew(CI, Builder); } LibFunc Func; diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll index 41db7f929dfdf..5a4fb04f5f2c0 100644 --- a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll +++ b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll @@ -6,14 +6,18 @@ ; OFF-LABEL: @new_hot_cold() ;; First check with the default hint values (254 = -2, 128 = -128, 222 = -34). -; RUN: opt < %s -passes=instcombine -optimize-hot-cold-new -S | FileCheck %s --check-prefix=HOTCOLD -DCOLD=1 -DHOT=-2 -DNOTCOLD=-128 -DAMBIG=-34 -DPREVHINTCOLD=7 -DPREVHINTNOTCOLD=7 -DPREVHINTHOT=7 -DPREVHINTAMBIG=7 +; RUN: opt < %s -passes=instcombine -optimize-hot-cold-new -S | FileCheck %s --check-prefixes=HOTCOLD,NOBUILTIN-OFF -DCOLD=1 -DHOT=-2 -DNOTCOLD=-128 -DAMBIG=-34 -DPREVHINTCOLD=7 -DPREVHINTNOTCOLD=7 -DPREVHINTHOT=7 -DPREVHINTAMBIG=7 ;; Next check with the non-default cold and hot hint values (200 =-56). -; RUN: opt < %s -passes=instcombine -optimize-hot-cold-new -cold-new-hint-value=5 -hot-new-hint-value=200 -notcold-new-hint-value=99 -ambiguous-new-hint-value=44 -S | FileCheck %s --check-prefix=HOTCOLD -DCOLD=5 -DHOT=-56 -DAMBIG=44 -DNOTCOLD=99 -DPREVHINTCOLD=7 -DPREVHINTNOTCOLD=7 -DPREVHINTHOT=7 -DPREVHINTAMBIG=7 +; RUN: opt < %s -passes=instcombine -optimize-hot-cold-new -cold-new-hint-value=5 -hot-new-hint-value=200 -notcold-new-hint-value=99 -ambiguous-new-hint-value=44 -S | FileCheck %s --check-prefixes=HOTCOLD,NOBUILTIN-OFF -DCOLD=5 -DHOT=-56 -DAMBIG=44 -DNOTCOLD=99 -DPREVHINTCOLD=7 -DPREVHINTNOTCOLD=7 -DPREVHINTHOT=7 -DPREVHINTAMBIG=7 + +;; Next check with the same non-default cold and hot hint values (200 =-56), +;; but with transformation of nobuiltin calls enabled. +; RUN: opt < %s -passes=instcombine -optimize-hot-cold-new -optimize-nobuiltin-hot-cold-new-new -cold-new-hint-value=5 -hot-new-hint-value=200 -notcold-new-hint-value=99 -ambiguous-new-hint-value=44 -S | FileCheck %s --check-prefixes=HOTCOLD,NOBUILTIN-ON -DCOLD=5 -DHOT=-56 -DAMBIG=44 -DNOTCOLD=99 -DPREVHINTCOLD=7 -DPREVHINTNOTCOLD=7 -DPREVHINTHOT=7 -DPREVHINTAMBIG=7 ;; Try again with the non-default cold and hot hint values (200 =-56), and this ;; time specify that existing hints should be updated. -; RUN: opt < %s -passes=instcombine -optimize-hot-cold-new -cold-new-hint-value=5 -notcold-new-hint-value=100 -hot-new-hint-value=200 -ambiguous-new-hint-value=44 -optimize-existing-hot-cold-new -S | FileCheck %s --check-prefix=HOTCOLD -DCOLD=5 -DHOT=-56 -DNOTCOLD=100 -DAMBIG=44 -DPREVHINTCOLD=5 -DPREVHINTNOTCOLD=100 -DPREVHINTHOT=-56 -DPREVHINTAMBIG=44 +; RUN: opt < %s -passes=instcombine -optimize-hot-cold-new -cold-new-hint-value=5 -notcold-new-hint-value=100 -hot-new-hint-value=200 -ambiguous-new-hint-value=44 -optimize-existing-hot-cold-new -S | FileCheck %s --check-prefixes=HOTCOLD,NOBUILTIN-OFF -DCOLD=5 -DHOT=-56 -DNOTCOLD=100 -DAMBIG=44 -DPREVHINTCOLD=5 -DPREVHINTNOTCOLD=100 -DPREVHINTHOT=-56 -DPREVHINTAMBIG=44 ;; Make sure that values not in 0..255 are flagged with an error ; RUN: not opt < %s -passes=instcombine -optimize-hot-cold-new -cold-new-hint-value=256 -S 2>&1 | FileCheck %s --check-prefix=ERROR @@ -40,8 +44,9 @@ define void @new() { ; HOTCOLD: @_Znwm12__hot_cold_t(i64 10, i8 [[AMBIG]]) %call4 = call ptr @_Znwm(i64 10) #7 call void @dummy(ptr %call4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @_Znwm(i64 10) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @_Znwm(i64 10) + ; NOBUILTIN-ON: @_Znwm12__hot_cold_t(i64 10, i8 [[COLD]]) %call3 = call ptr @_Znwm(i64 10) #6 call void @dummy(ptr %call3) ret void @@ -68,8 +73,9 @@ define void @new_align() { ; HOTCOLD: @_ZnwmSt11align_val_t12__hot_cold_t(i64 10, i64 8, i8 [[AMBIG]]) %call4 = call ptr @_ZnwmSt11align_val_t(i64 10, i64 8) #7 call void @dummy(ptr %call4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @_ZnwmSt11align_val_t(i64 10, i64 8) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @_ZnwmSt11align_val_t(i64 10, i64 8) + ; NOBUILTIN-ON: @_ZnwmSt11align_val_t12__hot_cold_t(i64 10, i64 8, i8 [[COLD]]) %call3 = call ptr @_ZnwmSt11align_val_t(i64 10, i64 8) #6 call void @dummy(ptr %call3) ret void @@ -97,8 +103,9 @@ define void @new_nothrow() { ; HOTCOLD: @_ZnwmRKSt9nothrow_t12__hot_cold_t(i64 10, ptr nonnull %nt, i8 [[AMBIG]]) %call4 = call ptr @_ZnwmRKSt9nothrow_t(i64 10, ptr %nt) #7 call void @dummy(ptr %call4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @_ZnwmRKSt9nothrow_t(i64 10, ptr nonnull %nt) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @_ZnwmRKSt9nothrow_t(i64 10, ptr nonnull %nt) + ; NOBUILTIN-ON: @_ZnwmRKSt9nothrow_t12__hot_cold_t(i64 10, ptr nonnull %nt, i8 [[COLD]]) %call3 = call ptr @_ZnwmRKSt9nothrow_t(i64 10, ptr %nt) #6 call void @dummy(ptr %call3) ret void @@ -127,8 +134,9 @@ define void @new_align_nothrow() { ; HOTCOLD: @_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t(i64 10, i64 8, ptr nonnull %nt, i8 [[AMBIG]]) %call4 = call ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 10, i64 8, ptr %nt) #7 call void @dummy(ptr %call4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 10, i64 8, ptr nonnull %nt) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 10, i64 8, ptr nonnull %nt) + ; NOBUILTIN-ON: @_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t(i64 10, i64 8, ptr nonnull %nt, i8 [[COLD]]) %call3 = call ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 10, i64 8, ptr %nt) #6 call void @dummy(ptr %call3) ret void @@ -154,8 +162,9 @@ define void @array_new() { ; HOTCOLD: @_Znam12__hot_cold_t(i64 10, i8 [[AMBIG]]) %call4 = call ptr @_Znam(i64 10) #7 call void @dummy(ptr %call4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @_Znam(i64 10) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @_Znam(i64 10) + ; NOBUILTIN-ON: @_Znam12__hot_cold_t(i64 10, i8 [[COLD]]) %call3 = call ptr @_Znam(i64 10) #6 call void @dummy(ptr %call3) ret void @@ -182,8 +191,9 @@ define void @array_new_align() { ; HOTCOLD: @_ZnamSt11align_val_t12__hot_cold_t(i64 10, i64 8, i8 [[AMBIG]]) %call4 = call ptr @_ZnamSt11align_val_t(i64 10, i64 8) #7 call void @dummy(ptr %call4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @_ZnamSt11align_val_t(i64 10, i64 8) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @_ZnamSt11align_val_t(i64 10, i64 8) + ; NOBUILTIN-ON: @_ZnamSt11align_val_t12__hot_cold_t(i64 10, i64 8, i8 [[COLD]]) %call3 = call ptr @_ZnamSt11align_val_t(i64 10, i64 8) #6 call void @dummy(ptr %call3) ret void @@ -211,8 +221,9 @@ define void @array_new_nothrow() { ; HOTCOLD: @_ZnamRKSt9nothrow_t12__hot_cold_t(i64 10, ptr nonnull %nt, i8 [[AMBIG]]) %call4 = call ptr @_ZnamRKSt9nothrow_t(i64 10, ptr %nt) #7 call void @dummy(ptr %call4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @_ZnamRKSt9nothrow_t(i64 10, ptr nonnull %nt) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @_ZnamRKSt9nothrow_t(i64 10, ptr nonnull %nt) + ; NOBUILTIN-ON: @_ZnamRKSt9nothrow_t12__hot_cold_t(i64 10, ptr nonnull %nt, i8 [[COLD]]) %call3 = call ptr @_ZnamRKSt9nothrow_t(i64 10, ptr %nt) #6 call void @dummy(ptr %call3) ret void @@ -241,8 +252,9 @@ define void @array_new_align_nothrow() { ; HOTCOLD: @_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t(i64 10, i64 8, ptr nonnull %nt, i8 [[AMBIG]]) %call4 = call ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 10, i64 8, ptr %nt) #7 call void @dummy(ptr %call4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @_ZnamSt11align_val_tRKSt9nothrow_t(i64 10, i64 8, ptr nonnull %nt) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @_ZnamSt11align_val_tRKSt9nothrow_t(i64 10, i64 8, ptr nonnull %nt) + ; NOBUILTIN-ON: @_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t(i64 10, i64 8, ptr nonnull %nt, i8 [[COLD]]) %call3 = call ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 10, i64 8, ptr %nt) #6 call void @dummy(ptr %call3) ret void @@ -492,8 +504,9 @@ define void @size_returning_test() { %call4 = call {ptr, i64} @__size_returning_new(i64 10) #8 %p4 = extractvalue {ptr, i64} %call4, 0 call void @dummy(ptr %p4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @__size_returning_new(i64 10) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @__size_returning_new(i64 10) + ; NOBUILTIN-ON: @__size_returning_new_hot_cold(i64 10, i8 [[COLD]]) %call3 = call {ptr, i64} @__size_returning_new(i64 10) #6 %p3 = extractvalue {ptr, i64} %call3, 0 call void @dummy(ptr %p3) @@ -524,8 +537,9 @@ define void @size_returning_aligned_test() { %call4 = call {ptr, i64} @__size_returning_new_aligned(i64 10, i64 8) #8 %p4 = extractvalue {ptr, i64} %call4, 0 call void @dummy(ptr %p4) - ;; Attribute cold on a nobuiltin call has no effect. - ; HOTCOLD: @__size_returning_new_aligned(i64 10, i64 8) + ;; Attribute cold on a nobuiltin call has no effect, unless optionally enabled. + ; NOBUILTIN-OFF: @__size_returning_new_aligned(i64 10, i64 8) + ; NOBUILTIN-ON: @__size_returning_new_aligned_hot_cold(i64 10, i64 8, i8 [[COLD]]) %call3 = call {ptr, i64} @__size_returning_new_aligned(i64 10, i64 8) #6 %p3 = extractvalue {ptr, i64} %call3, 0 call void @dummy(ptr %p3) From c642e2aa61c430ae597b0bd08e924339292e30e9 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar <96587705+kr-2003@users.noreply.github.com> Date: Sat, 13 Sep 2025 12:31:31 +0530 Subject: [PATCH 212/734] [clang-repl] Add support for running custom code in Remote JIT executor (#157358) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a custom lambda mechanism that allows injecting user-defined code into the Remote JIT’s executor. --------- Co-authored-by: kr-2003 --- clang/include/clang/Interpreter/Interpreter.h | 4 +- clang/lib/Interpreter/IncrementalExecutor.cpp | 6 +- clang/lib/Interpreter/IncrementalExecutor.h | 3 +- clang/lib/Interpreter/Interpreter.cpp | 3 +- clang/unittests/Interpreter/CMakeLists.txt | 23 +- .../OutOfProcessInterpreterTests.cpp | 203 ++++++++++++++++++ 6 files changed, 237 insertions(+), 5 deletions(-) create mode 100644 clang/unittests/Interpreter/OutOfProcessInterpreterTests.cpp diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index fcc270a17001e..078d70b3b1749 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -135,13 +135,15 @@ class Interpreter { std::string OrcRuntimePath = ""; /// PID of the out-of-process JIT executor. uint32_t ExecutorPID = 0; + /// Custom lambda to be executed inside child process/executor + std::function CustomizeFork = nullptr; /// An optional code model to provide to the JITTargetMachineBuilder std::optional CM = std::nullopt; JITConfig() : IsOutOfProcess(false), OOPExecutor(""), OOPExecutorConnect(""), UseSharedMemory(false), SlabAllocateSize(0), OrcRuntimePath(""), - ExecutorPID(0), CM(std::nullopt) {} + ExecutorPID(0), CustomizeFork(nullptr), CM(std::nullopt) {} }; protected: diff --git a/clang/lib/Interpreter/IncrementalExecutor.cpp b/clang/lib/Interpreter/IncrementalExecutor.cpp index b0eb7d0e9f072..45620fcd358c8 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.cpp +++ b/clang/lib/Interpreter/IncrementalExecutor.cpp @@ -172,7 +172,8 @@ createSharedMemoryManager(llvm::orc::SimpleRemoteEPC &SREPC, llvm::Expected, uint32_t>> IncrementalExecutor::launchExecutor(llvm::StringRef ExecutablePath, bool UseSharedMemory, - unsigned SlabAllocateSize) { + unsigned SlabAllocateSize, + std::function CustomizeFork) { #ifndef LLVM_ON_UNIX // FIXME: Add support for Windows. return llvm::make_error( @@ -215,6 +216,9 @@ IncrementalExecutor::launchExecutor(llvm::StringRef ExecutablePath, close(ToExecutor[WriteEnd]); close(FromExecutor[ReadEnd]); + if (CustomizeFork) + CustomizeFork(); + // Execute the child process. std::unique_ptr ExecutorPath, FDSpecifier; { diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h index d091535166770..bb1ec33452515 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.h +++ b/clang/lib/Interpreter/IncrementalExecutor.h @@ -79,7 +79,8 @@ class IncrementalExecutor { static llvm::Expected< std::pair, uint32_t>> launchExecutor(llvm::StringRef ExecutablePath, bool UseSharedMemory, - unsigned SlabAllocateSize); + unsigned SlabAllocateSize, + std::function CustomizeFork = nullptr); #if LLVM_ON_UNIX && LLVM_ENABLE_THREADS static llvm::Expected> diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 84f1c363b5f6f..07c170a63ce82 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -355,7 +355,8 @@ Interpreter::outOfProcessJITBuilder(JITConfig Config) { if (!Config.OOPExecutor.empty()) { // Launch an out-of-process executor locally in a child process. auto ResultOrErr = IncrementalExecutor::launchExecutor( - Config.OOPExecutor, Config.UseSharedMemory, Config.SlabAllocateSize); + Config.OOPExecutor, Config.UseSharedMemory, Config.SlabAllocateSize, + Config.CustomizeFork); if (!ResultOrErr) return ResultOrErr.takeError(); childPid = ResultOrErr->second; diff --git a/clang/unittests/Interpreter/CMakeLists.txt b/clang/unittests/Interpreter/CMakeLists.txt index db9f80d9f53fe..7b8dcfc9b0546 100644 --- a/clang/unittests/Interpreter/CMakeLists.txt +++ b/clang/unittests/Interpreter/CMakeLists.txt @@ -29,12 +29,25 @@ set(CLANG_LIBS_TO_LINK ) endif() -add_distinct_clang_unittest(ClangReplInterpreterTests +set(CLANG_REPL_TEST_SOURCES IncrementalCompilerBuilderTest.cpp IncrementalProcessingTest.cpp InterpreterTest.cpp InterpreterExtensionsTest.cpp CodeCompletionTest.cpp +) + +if(TARGET compiler-rt) + list(APPEND CLANG_REPL_TEST_SOURCES + OutOfProcessInterpreterTests.cpp + ) + message(STATUS "Compiler-RT found, enabling out of process JIT tests") +endif() + +add_distinct_clang_unittest(ClangReplInterpreterTests + ${CLANG_REPL_TEST_SOURCES} + + PARTIAL_SOURCES_INTENDED EXPORT_SYMBOLS @@ -48,6 +61,14 @@ add_distinct_clang_unittest(ClangReplInterpreterTests ${LLVM_COMPONENTS_TO_LINK} ) +if(TARGET compiler-rt) + add_dependencies(ClangReplInterpreterTests + llvm-jitlink-executor + compiler-rt + ) + message(STATUS "Adding dependency on compiler-rt for out of process JIT tests") +endif() + if(EMSCRIPTEN) # Without the above you try to link to LLVMSupport twice, and end # up with a duplicate symbol error when creating the main module diff --git a/clang/unittests/Interpreter/OutOfProcessInterpreterTests.cpp b/clang/unittests/Interpreter/OutOfProcessInterpreterTests.cpp new file mode 100644 index 0000000000000..704ddc37e642e --- /dev/null +++ b/clang/unittests/Interpreter/OutOfProcessInterpreterTests.cpp @@ -0,0 +1,203 @@ +//===- unittests/Interpreter/OutOfProcessInterpreterTest.cpp --- Interpreter +// tests when Out-of-Process ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Unit tests for Clang's Interpreter library. +// +//===----------------------------------------------------------------------===// + +#include "InterpreterTestFixture.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclGroup.h" +#include "clang/AST/Mangle.h" +#include "clang/Basic/Version.h" +#include "clang/Config/config.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "clang/Interpreter/Interpreter.h" +#include "clang/Interpreter/Value.h" +#include "clang/Sema/Lookup.h" +#include "clang/Sema/Sema.h" +#include "llvm/Support/Error.h" +#include "llvm/TargetParser/Host.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include +#include +#include + +using namespace clang; + +llvm::ExitOnError ExitOnError; + +namespace { + +using Args = std::vector; + +struct FileDeleter { + void operator()(FILE *f) { + if (f) + fclose(f); + } +}; + +struct IOContext { + std::unique_ptr stdin_file; + std::unique_ptr stdout_file; + std::unique_ptr stderr_file; + + bool initializeTempFiles() { + stdin_file.reset(tmpfile()); + stdout_file.reset(tmpfile()); + stderr_file.reset(tmpfile()); + return stdin_file && stdout_file && stderr_file; + } + + std::string readStdoutContent() { + if (!stdout_file) + return ""; + rewind(stdout_file.get()); + std::ostringstream content; + char buffer[1024]; + size_t bytes_read; + while ((bytes_read = fread(buffer, 1, sizeof(buffer), stdout_file.get())) > + 0) { + content.write(buffer, bytes_read); + } + return content.str(); + } + + std::string readStderrContent() { + if (!stderr_file) + return ""; + rewind(stderr_file.get()); + std::ostringstream content; + char buffer[1024]; + size_t bytes_read; + while ((bytes_read = fread(buffer, 1, sizeof(buffer), stderr_file.get())) > + 0) { + content.write(buffer, bytes_read); + } + return content.str(); + } +}; + +static void removePathComponent(unsigned N, llvm::SmallString<256> &Path) { + for (unsigned i = 0; i < N; ++i) + llvm::sys::path::remove_filename(Path); +} + +static std::string getExecutorPath() { + llvm::SmallString<256> ExecutorPath(llvm::sys::fs::getMainExecutable( + nullptr, reinterpret_cast(&getExecutorPath))); + removePathComponent(5, ExecutorPath); + llvm::sys::path::append(ExecutorPath, "bin", "llvm-jitlink-executor"); + return ExecutorPath.str().str(); +} + +static std::string getOrcRuntimePath() { + llvm::SmallString<256> RuntimePath(llvm::sys::fs::getMainExecutable( + nullptr, reinterpret_cast(&getOrcRuntimePath))); + removePathComponent(5, RuntimePath); + llvm::sys::path::append(RuntimePath, CLANG_INSTALL_LIBDIR_BASENAME, "clang", + CLANG_VERSION_MAJOR_STRING, "lib"); + + llvm::Triple SystemTriple(llvm::sys::getProcessTriple()); + if (SystemTriple.isOSBinFormatMachO()) { + llvm::sys::path::append(RuntimePath, "darwin", "liborc_rt_osx.a"); + } else if (SystemTriple.isOSBinFormatELF()) { + llvm::sys::path::append(RuntimePath, "x86_64-unknown-linux-gnu", + "liborc_rt.a"); + } + return RuntimePath.str().str(); +} + +static std::unique_ptr +createInterpreterWithRemoteExecution(std::shared_ptr io_ctx, + const Args &ExtraArgs = {}) { + Args ClangArgs = {"-Xclang", "-emit-llvm-only"}; + llvm::append_range(ClangArgs, ExtraArgs); + auto CB = clang::IncrementalCompilerBuilder(); + CB.SetCompilerArgs(ClangArgs); + auto CI = cantFail(CB.CreateCpp()); + + clang::Interpreter::JITConfig Config; + llvm::Triple SystemTriple(llvm::sys::getProcessTriple()); + + if (SystemTriple.isOSBinFormatELF() || SystemTriple.isOSBinFormatMachO()) { + Config.IsOutOfProcess = true; + Config.OOPExecutor = getExecutorPath(); + Config.UseSharedMemory = false; + Config.SlabAllocateSize = 0; + Config.OrcRuntimePath = getOrcRuntimePath(); + + int stdin_fd = fileno(io_ctx->stdin_file.get()); + int stdout_fd = fileno(io_ctx->stdout_file.get()); + int stderr_fd = fileno(io_ctx->stderr_file.get()); + + Config.CustomizeFork = [=] { + auto redirect = [](int from, int to) { + if (from != to) { + dup2(from, to); + close(from); + } + }; + + redirect(stdin_fd, STDIN_FILENO); + redirect(stdout_fd, STDOUT_FILENO); + redirect(stderr_fd, STDERR_FILENO); + + setvbuf(stdout, nullptr, _IONBF, 0); + setvbuf(stderr, nullptr, _IONBF, 0); + + printf("CustomizeFork executed\n"); + fflush(stdout); + }; + } + + return cantFail(clang::Interpreter::create(std::move(CI), Config)); +} + +static size_t DeclsSize(TranslationUnitDecl *PTUDecl) { + return std::distance(PTUDecl->decls().begin(), PTUDecl->decls().end()); +} + +TEST_F(InterpreterTestBase, SanityWithRemoteExecution) { + if (!HostSupportsJIT()) + GTEST_SKIP(); + + std::string OrcRuntimePath = getOrcRuntimePath(); + std::string ExecutorPath = getExecutorPath(); + + if (!llvm::sys::fs::exists(OrcRuntimePath) || + !llvm::sys::fs::exists(ExecutorPath)) + GTEST_SKIP(); + + auto io_ctx = std::make_shared(); + ASSERT_TRUE(io_ctx->initializeTempFiles()); + + std::unique_ptr Interp = + createInterpreterWithRemoteExecution(io_ctx); + ASSERT_TRUE(Interp); + + using PTU = PartialTranslationUnit; + PTU &R1(cantFail(Interp->Parse("void g(); void g() {}"))); + EXPECT_EQ(2U, DeclsSize(R1.TUPart)); + + PTU &R2(cantFail(Interp->Parse("int i = 42;"))); + EXPECT_EQ(1U, DeclsSize(R2.TUPart)); + + std::string captured_stdout = io_ctx->readStdoutContent(); + std::string captured_stderr = io_ctx->readStderrContent(); + + EXPECT_TRUE(captured_stdout.find("CustomizeFork executed") != + std::string::npos); +} + +} // end anonymous namespace \ No newline at end of file From 1dc4db8f1ec535adc663d781f9f3a39f78d78256 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 13 Sep 2025 16:34:02 +0900 Subject: [PATCH 213/734] AMDGPU: Relax verifier for agpr/vgpr loads and stores (#158391) --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0361868e2c1e8..70223da961e92 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5590,7 +5590,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, Data = nullptr; if (ST.hasGFX90AInsts()) { - if (Dst && Data && + if (Dst && Data && !Dst->isTied() && !Data->isTied() && (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI, Data->getReg()))) { ErrInfo = "Invalid register class: " "vdata and vdst should be both VGPR or AGPR"; From 2a3c9f917d4112d7d96be0f2efa9e97f0b4bb842 Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Sat, 13 Sep 2025 10:38:56 +0200 Subject: [PATCH 214/734] [CIR] Upstream VisitOpaqueValueExpr support for Complex & Scalar (#157331) This change adds support for the OpaqueValueExpr for Complex & Scalar Issue: https://github.com/llvm/llvm-project/issues/141365 --- clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 24 +++ clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp | 22 ++- clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 9 ++ clang/lib/CIR/CodeGen/CIRGenFunction.h | 8 + clang/test/CIR/CodeGen/opaque.cpp | 156 ++++++++++++++++++++ 5 files changed, 215 insertions(+), 4 deletions(-) create mode 100644 clang/test/CIR/CodeGen/opaque.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index aab7e2745f30f..4f2bafd986292 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -1376,6 +1376,30 @@ LValue CIRGenFunction::emitMaterializeTemporaryExpr( return makeAddrLValue(object, m->getType(), AlignmentSource::Decl); } +LValue +CIRGenFunction::getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e) { + assert(OpaqueValueMapping::shouldBindAsLValue(e)); + + auto it = opaqueLValues.find(e); + if (it != opaqueLValues.end()) + return it->second; + + assert(e->isUnique() && "LValue for a nonunique OVE hasn't been emitted"); + return emitLValue(e->getSourceExpr()); +} + +RValue +CIRGenFunction::getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e) { + assert(!OpaqueValueMapping::shouldBindAsLValue(e)); + + auto it = opaqueRValues.find(e); + if (it != opaqueRValues.end()) + return it->second; + + assert(e->isUnique() && "RValue for a nonunique OVE hasn't been emitted"); + return emitAnyExpr(e->getSourceExpr()); +} + LValue CIRGenFunction::emitCompoundLiteralLValue(const CompoundLiteralExpr *e) { if (e->isFileScope()) { cgm.errorNYI(e->getSourceRange(), "emitCompoundLiteralLValue: FileScope"); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index d678ea0212aa5..614c915a3a93d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -128,9 +128,12 @@ class ComplexExprEmitter : public StmtVisitor { return emitLoadOfLValue(me); } mlir::Value VisitOpaqueValueExpr(OpaqueValueExpr *e) { - cgf.cgm.errorNYI(e->getExprLoc(), - "ComplexExprEmitter VisitOpaqueValueExpr"); - return {}; + if (e->isGLValue()) + return emitLoadOfLValue(cgf.getOrCreateOpaqueLValueMapping(e), + e->getExprLoc()); + + // Otherwise, assume the mapping is the scalar directly. + return cgf.getOrCreateOpaqueRValueMapping(e).getValue(); } mlir::Value VisitPseudoObjectExpr(PseudoObjectExpr *e) { @@ -960,21 +963,32 @@ mlir::Value ComplexExprEmitter::VisitBinComma(const BinaryOperator *e) { mlir::Value ComplexExprEmitter::VisitAbstractConditionalOperator( const AbstractConditionalOperator *e) { - mlir::Value condValue = Visit(e->getCond()); mlir::Location loc = cgf.getLoc(e->getSourceRange()); + // Bind the common expression if necessary. + CIRGenFunction::OpaqueValueMapping binding(cgf, e); + + CIRGenFunction::ConditionalEvaluation eval(cgf); + + Expr *cond = e->getCond()->IgnoreParens(); + mlir::Value condValue = cgf.evaluateExprAsBool(cond); + return builder .create( loc, condValue, /*thenBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { + eval.beginEvaluation(); mlir::Value trueValue = Visit(e->getTrueExpr()); b.create(loc, trueValue); + eval.endEvaluation(); }, /*elseBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { + eval.beginEvaluation(); mlir::Value falseValue = Visit(e->getFalseExpr()); b.create(loc, falseValue); + eval.endEvaluation(); }) .getResult(); } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 754ef79392916..2261e24fe44c2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -193,6 +193,15 @@ class ScalarExprEmitter : public StmtVisitor { return emitNullValue(e->getType(), cgf.getLoc(e->getSourceRange())); } + mlir::Value VisitOpaqueValueExpr(OpaqueValueExpr *e) { + if (e->isGLValue()) + return emitLoadOfLValue(cgf.getOrCreateOpaqueLValueMapping(e), + e->getExprLoc()); + + // Otherwise, assume the mapping is the scalar directly. + return cgf.getOrCreateOpaqueRValueMapping(e).getValue(); + } + mlir::Value VisitCastExpr(CastExpr *e); mlir::Value VisitCallExpr(const CallExpr *e); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 42f7f401555ca..30f06dffc0769 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -706,6 +706,14 @@ class CIRGenFunction : public CIRGenTypeCache { Address getAddrOfBitFieldStorage(LValue base, const clang::FieldDecl *field, mlir::Type fieldType, unsigned index); + /// Given an opaque value expression, return its LValue mapping if it exists, + /// otherwise create one. + LValue getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e); + + /// Given an opaque value expression, return its RValue mapping if it exists, + /// otherwise create one. + RValue getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e); + /// Load the value for 'this'. This function is only valid while generating /// code for an C++ member function. /// FIXME(cir): this should return a mlir::Value! diff --git a/clang/test/CIR/CodeGen/opaque.cpp b/clang/test/CIR/CodeGen/opaque.cpp new file mode 100644 index 0000000000000..a48c013e5c20b --- /dev/null +++ b/clang/test/CIR/CodeGen/opaque.cpp @@ -0,0 +1,156 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +void foo() { + int a; + int b = 1 ?: a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["b", init] +// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i +// CIR: cir.store{{.*}} %[[CONST_1]], %[[B_ADDR]] : !s32i, !cir.ptr + +// LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[B_ADDR:.*]] = alloca i32, i64 1, align 4 +// LLVM: store i32 1, ptr %[[B_ADDR]], align 4 + +// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 +// OGCG: %[[B_ADDR:.*]] = alloca i32, align 4 +// OGCG: store i32 1, ptr %[[B_ADDR]], align 4 + +void foo2() { + float _Complex a; + float _Complex b; + float _Complex c = a ?: b; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["b"] +// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.complex +// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex -> !cir.float +// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex -> !cir.float +// CIR: %[[A_REAL_BOOL:.*]] = cir.cast(float_to_bool, %[[A_REAL]] : !cir.float), !cir.bool +// CIR: %[[A_IMAG_BOOL:.*]] = cir.cast(float_to_bool, %[[A_IMAG]] : !cir.float), !cir.bool +// CIR: %[[CONST_TRUE:.*]] = cir.const #true +// CIR: %[[COND:.*]] = cir.select if %[[A_REAL_BOOL]] then %[[CONST_TRUE]] else %[[A_IMAG_BOOL]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool +// CIR: %[[RESULT:.*]] = cir.ternary(%[[COND]], true { +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.complex +// CIR: cir.yield %[[TMP_A]] : !cir.complex +// CIR: }, false { +// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.complex +// CIR: cir.yield %[[TMP_B]] : !cir.complex +// CIR: }) : (!cir.bool) -> !cir.complex +// CIR: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex, !cir.ptr> + +// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4 +// LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0 +// LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1 +// LLVM: %[[A_REAL_BOOL:.*]] = fcmp une float %[[A_REAL]], 0.000000e+00 +// LLVM: %[[A_IMAG_BOOL:.*]] = fcmp une float %[[A_IMAG]], 0.000000e+00 +// LLVM: %[[COND:.*]] = or i1 %[[A_REAL_BOOL]], %[[A_IMAG_BOOL]] +// LLVM: br i1 %[[COND]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// LLVM: [[COND_TRUE]]: +// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4 +// LLVM: br label %[[COND_RESULT:.*]] +// LLVM: [[COND_FALSE]]: +// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4 +// LLVM: br label %[[COND_RESULT]] +// LLVM: [[COND_RESULT]]: +// LLVM: %[[RESULT:.*]] = phi { float, float } [ %[[TMP_B]], %[[COND_FALSE]] ], [ %[[TMP_A]], %[[COND_TRUE]] ] +// LLVM: br label %[[COND_END:.*]] +// LLVM: [[COND_END]]: +// LLVM: store { float, float } %[[RESULT]], ptr %[[C_ADDR]], align 4 + +// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG: %[[C_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4 +// OGCG: %[[A_REAL_BOOL:.*]] = fcmp une float %[[A_REAL]], 0.000000e+00 +// OGCG: %[[A_IMAG_BOOL:.*]] = fcmp une float %[[A_IMAG]], 0.000000e+00 +// OGCG: %[[COND:.*]] = or i1 %[[A_REAL_BOOL]], %[[A_IMAG_BOOL]] +// OGCG: br i1 %tobool2, label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// OGCG: [[COND_TRUE]]: +// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4 +// OGCG: br label %[[COND_END:.*]] +// OGCG: [[COND_FALSE]]: +// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0 +// OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4 +// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1 +// OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4 +// OGCG: br label %[[COND_END]] +// OGCG: [[COND_END]]: +// OGCG: %[[RESULT_REAL:.*]] = phi float [ %[[A_REAL]], %[[COND_TRUE]] ], [ %[[B_REAL]], %[[COND_FALSE]] ] +// OGCG: %[[RESULT_IMAG:.*]] = phi float [ %[[A_IMAG]], %[[COND_TRUE]] ], [ %[[B_IMAG]], %[[COND_FALSE]] ] +// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0 +// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1 +// OGCG: store float %[[RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4 +// OGCG: store float %[[RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4 + +void foo3() { + int a; + int b; + int c = a ?: b; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["b"] +// CIR: %[[C_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["c", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr, !s32i +// CIR: %[[A_BOOL:.*]] = cir.cast(int_to_bool, %[[TMP_A]] : !s32i), !cir.bool +// CIR: %[[RESULT:.*]] = cir.ternary(%[[A_BOOL]], true { +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr, !s32i +// CIR: cir.yield %[[TMP_A]] : !s32i +// CIR: }, false { +// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr, !s32i +// CIR: cir.yield %[[TMP_B]] : !s32i +// CIR: }) : (!cir.bool) -> !s32i +// CIR: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !s32i, !cir.ptr + +// LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[B_ADDR:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[C_ADDR:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4 +// LLVM: %[[COND:.*]] = icmp ne i32 %[[TMP_A]], 0 +// LLVM: br i1 %[[COND]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// LLVM: [[COND_TRUE]]: +// LLVM: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4 +// LLVM: br label %[[COND_RESULT:.*]] +// LLVM: [[COND_FALSE]]: +// LLVM: %[[TMP_B:.*]] = load i32, ptr %[[B_ADDR]], align 4 +// LLVM: br label %[[COND_RESULT]] +// LLVM: [[COND_RESULT]]: +// LLVM: %[[RESULT:.*]] = phi i32 [ %[[TMP_B]], %[[COND_FALSE]] ], [ %[[TMP_A]], %[[COND_TRUE]] ] +// LLVM: br label %[[COND_END:.*]] +// LLVM: [[COND_END]]: +// LLVM: store i32 %[[RESULT]], ptr %[[C_ADDR]], align 4 + +// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 +// OGCG: %[[B_ADDR:.*]] = alloca i32, align 4 +// OGCG: %[[C_ADDR:.*]] = alloca i32, align 4 +// OGCG: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4 +// OGCG: %[[A_BOOL:.*]] = icmp ne i32 %[[TMP_A]], 0 +// OGCG: br i1 %[[A_BOOL]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// OGCG: [[COND_TRUE]]: +// OGCG: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4 +// OGCG: br label %[[COND_END:.*]] +// OGCG: [[COND_FALSE]]: +// OGCG: %[[TMP_B:.*]] = load i32, ptr %[[B_ADDR]], align 4 +// OGCG: br label %[[COND_END]] +// OGCG: [[COND_END]]: +// OGCG: %[[RESULT:.*]] = phi i32 [ %[[TMP_A]], %[[COND_TRUE]] ], [ %[[TMP_B]], %[[COND_FALSE]] ] +// OGCG: store i32 %[[RESULT]], ptr %[[C_ADDR]], align 4 From cf9576d940ccd4fbe7d3fe0eef5cfdc9f693eade Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Sat, 13 Sep 2025 11:02:30 +0200 Subject: [PATCH 215/734] [CIR] Upstream FPToFPBuiltin CosOp (#158342) Upstream support for FPToFPBuiltin CosOp --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 10 +++++++ clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 13 +++++++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 9 +++++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 9 +++++++ clang/test/CIR/CodeGen/builtins-elementwise.c | 27 +++++++++++++++++++ .../CIR/CodeGen/builtins-floating-point.c | 20 ++++++++++++++ 6 files changed, 88 insertions(+) create mode 100644 clang/test/CIR/CodeGen/builtins-floating-point.c diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index b3c435cc59140..38c4a87f69d6d 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -3847,6 +3847,16 @@ def CIR_ATanOp : CIR_UnaryFPToFPBuiltinOp<"atan", "ATanOp"> { }]; } +def CIR_CosOp : CIR_UnaryFPToFPBuiltinOp<"cos", "CosOp"> { + let summary = "Computes the floating-point cosine value"; + let description = [{ + `cir.cos` computes the cosine of a floating-point operand and returns + a result of the same type. + + Floating-point exceptions are ignored, and it does not set `errno`. + }]; +} + def CIR_FAbsOp : CIR_UnaryFPToFPBuiltinOp<"fabs", "FAbsOp"> { let summary = "Computes the floating-point absolute value"; let description = [{ diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 8892e62accb74..cf17de144f4d9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -200,6 +200,17 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, builder.createBitcast(allocaAddr, builder.getVoidPtrTy())); } + case Builtin::BIcos: + case Builtin::BIcosf: + case Builtin::BIcosl: + case Builtin::BI__builtin_cos: + case Builtin::BI__builtin_cosf: + case Builtin::BI__builtin_cosf16: + case Builtin::BI__builtin_cosl: + case Builtin::BI__builtin_cosf128: + assert(!cir::MissingFeatures::fastMathFlags()); + return emitUnaryMaybeConstrainedFPBuiltin(*this, *e); + case Builtin::BIfabs: case Builtin::BIfabsf: case Builtin::BIfabsl: @@ -415,6 +426,8 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return emitUnaryFPBuiltin(*this, *e); case Builtin::BI__builtin_elementwise_atan: return emitUnaryFPBuiltin(*this, *e); + case Builtin::BI__builtin_elementwise_cos: + return emitUnaryFPBuiltin(*this, *e); } // If this is an alias for a lib function (e.g. __builtin_sin), emit diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index d9097b0b9e03d..1d7e3df1430ac 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -185,6 +185,14 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMCosOpLowering::matchAndRewrite( + cir::CosOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type resTy = typeConverter->convertType(op.getType()); + rewriter.replaceOpWithNewOp(op, resTy, adaptor.getSrc()); + return mlir::success(); +} + static mlir::Value getLLVMIntCast(mlir::ConversionPatternRewriter &rewriter, mlir::Value llvmSrc, mlir::Type llvmDstIntTy, bool isUnsigned, uint64_t cirSrcWidth, @@ -2498,6 +2506,7 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMComplexRealPtrOpLowering, CIRToLLVMComplexSubOpLowering, CIRToLLVMCopyOpLowering, + CIRToLLVMCosOpLowering, CIRToLLVMConstantOpLowering, CIRToLLVMExpectOpLowering, CIRToLLVMFAbsOpLowering, diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index dd1dd0aaec7d8..09ff7a0901c69 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -189,6 +189,15 @@ class CIRToLLVMCopyOpLowering : public mlir::OpConversionPattern { mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMCosOpLowering : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::CosOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + class CIRToLLVMExpectOpLowering : public mlir::OpConversionPattern { public: diff --git a/clang/test/CIR/CodeGen/builtins-elementwise.c b/clang/test/CIR/CodeGen/builtins-elementwise.c index e3460f06d166a..f64080b829bdf 100644 --- a/clang/test/CIR/CodeGen/builtins-elementwise.c +++ b/clang/test/CIR/CodeGen/builtins-elementwise.c @@ -89,3 +89,30 @@ void test_builtin_elementwise_atan(float f, double d, vfloat4 vf4, // OGCG: %{{.*}} = call <4 x double> @llvm.atan.v4f64(<4 x double> %{{.*}}) vd4 = __builtin_elementwise_atan(vd4); } + +void test_builtin_elementwise_cos(float f, double d, vfloat4 vf4, + vdouble4 vd4) { + // CIR-LABEL: test_builtin_elementwise_cos + // LLVM-LABEL: test_builtin_elementwise_cos + // OGCG-LABEL: test_builtin_elementwise_cos + + // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.float + // LLVM: {{%.*}} = call float @llvm.cos.f32(float {{%.*}}) + // OGCG: {{%.*}} = call float @llvm.cos.f32(float {{%.*}}) + f = __builtin_elementwise_cos(f); + + // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.double + // LLVM: {{%.*}} = call double @llvm.cos.f64(double {{%.*}}) + // OGCG: {{%.*}} = call double @llvm.cos.f64(double {{%.*}}) + d = __builtin_elementwise_cos(d); + + // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<4 x !cir.float> + // LLVM: {{%.*}} = call <4 x float> @llvm.cos.v4f32(<4 x float> {{%.*}}) + // OGCG: {{%.*}} = call <4 x float> @llvm.cos.v4f32(<4 x float> {{%.*}}) + vf4 = __builtin_elementwise_cos(vf4); + + // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<4 x !cir.double> + // LLVM: {{%.*}} = call <4 x double> @llvm.cos.v4f64(<4 x double> {{%.*}}) + // OGCG: {{%.*}} = call <4 x double> @llvm.cos.v4f64(<4 x double> {{%.*}}) + vd4 = __builtin_elementwise_cos(vd4); +} diff --git a/clang/test/CIR/CodeGen/builtins-floating-point.c b/clang/test/CIR/CodeGen/builtins-floating-point.c new file mode 100644 index 0000000000000..193cc172d37d2 --- /dev/null +++ b/clang/test/CIR/CodeGen/builtins-floating-point.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -Wno-unused-value -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -Wno-unused-value -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +float cosf(float f) { + return __builtin_cosf(f); + // CHECK: %{{.*}} = cir.cos {{.*}} : !cir.float + // LLVM: %{{.*}} = call float @llvm.cos.f32(float %{{.*}}) + // OGCG: %{{.*}} = call float @llvm.cos.f32(float %{{.*}}) +} + +double cos(double f) { + return __builtin_cos(f); + // CIR: {{.+}} = cir.cos {{.+}} : !cir.double + // LLVM: %{{.*}} = call double @llvm.cos.f64(double %{{.*}}) + // OGCG: %{{.*}} = call double @llvm.cos.f64(double %{{.*}}) +} From b31f8cb1c9108f6b45c8929df27b61396a4ccd45 Mon Sep 17 00:00:00 2001 From: nerix Date: Sat, 13 Sep 2025 11:15:52 +0200 Subject: [PATCH 216/734] [LLDB] Require DIA SDK for testing the PDB plugin-selection setting (#158284) If LLDB is built without the DIA SDK enabled, then the native plugin is used regardless of `plugin.symbol-file.pdb.reader` or `LLDB_USE_NATIVE_PDB_READER`. This made the test fail on Windows when the DIA SDK was disabled (https://github.com/llvm/llvm-project/issues/114906#issuecomment-3241796062). This PR changes the requirement for the test from `target-windows` to `diasdk` (only used in this test). --- lldb/test/CMakeLists.txt | 1 + lldb/test/Shell/SymbolFile/PDB/native-setting.cpp | 2 +- lldb/test/Shell/lit.cfg.py | 3 +++ lldb/test/Shell/lit.site.cfg.py.in | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index 39462560c4b98..8116f4c3c823a 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -250,6 +250,7 @@ llvm_canonicalize_cmake_booleans( LLDB_ENABLE_LZMA LLVM_ENABLE_ZLIB LLVM_ENABLE_SHARED_LIBS + LLVM_ENABLE_DIA_SDK LLDB_HAS_LIBCXX LLDB_TEST_SHELL_DISABLE_REMOTE LLDB_TOOL_LLDB_SERVER_BUILD diff --git a/lldb/test/Shell/SymbolFile/PDB/native-setting.cpp b/lldb/test/Shell/SymbolFile/PDB/native-setting.cpp index ce188e75553c7..edf7508b88f17 100644 --- a/lldb/test/Shell/SymbolFile/PDB/native-setting.cpp +++ b/lldb/test/Shell/SymbolFile/PDB/native-setting.cpp @@ -1,4 +1,4 @@ -// REQUIRES: target-windows +// REQUIRES: diasdk // Test plugin.symbol-file.pdb.reader setting // RUN: %build -o %t.exe -- %s diff --git a/lldb/test/Shell/lit.cfg.py b/lldb/test/Shell/lit.cfg.py index 46e2117cdb8e7..505847fb763e0 100644 --- a/lldb/test/Shell/lit.cfg.py +++ b/lldb/test/Shell/lit.cfg.py @@ -170,6 +170,9 @@ def calculate_arch_features(arch_string): ) ) +if config.have_dia_sdk: + config.available_features.add("diasdk") + # NetBSD permits setting dbregs either if one is root # or if user_set_dbregs is enabled can_set_dbregs = True diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index beaa41e6fd379..47beac002a19c 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -34,6 +34,7 @@ config.have_lldb_server = @LLDB_TOOL_LLDB_SERVER_BUILD@ config.lldb_system_debugserver = @LLDB_USE_SYSTEM_DEBUGSERVER@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.lldb_has_lldbrpc = @LLDB_BUILD_LLDBRPC@ +config.have_dia_sdk = @LLVM_ENABLE_DIA_SDK@ # The shell tests use their own module caches. config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell") From 9ac292441fcc4ebbaf4c7c36a9b2e2dd32580be0 Mon Sep 17 00:00:00 2001 From: Younan Zhang Date: Sat, 13 Sep 2025 18:03:13 +0800 Subject: [PATCH 217/734] [Clang] Fix the source location of default template arguments in placeholder constraints (#158414) We discovered this issue while working on the concept normalization refactoring. We missed the source location when diagnosing the instantiation point of the placeholder constraints, which is involved by the substitution of default template arguments that happens before constraint evaluation. See the issue alive: https://godbolt.org/z/cWr9qP3E8 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/lib/Sema/SemaTemplateDeduction.cpp | 2 +- clang/test/SemaTemplate/concepts.cpp | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 51e5973098c14..6eb2a52e80ba9 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -285,6 +285,9 @@ Improvements to Clang's diagnostics - Clang now looks through parenthesis for ``-Wundefined-reinterpret-cast`` diagnostic. +- Fixed a bug where the source location was missing when diagnosing ill-formed + placeholder constraints. + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 64be2aab259f5..62e867c44ad14 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -5180,7 +5180,7 @@ static bool CheckDeducedPlaceholderConstraints(Sema &S, const AutoType &Type, TemplateArgs.addArgument(TypeLoc.getArgLoc(I)); Sema::CheckTemplateArgumentInfo CTAI; - if (S.CheckTemplateArgumentList(Concept, SourceLocation(), TemplateArgs, + if (S.CheckTemplateArgumentList(Concept, TypeLoc.getNameLoc(), TemplateArgs, /*DefaultArgs=*/{}, /*PartialTemplateArgs=*/false, CTAI)) return true; diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp index d63ad01b35800..209e7dc69797d 100644 --- a/clang/test/SemaTemplate/concepts.cpp +++ b/clang/test/SemaTemplate/concepts.cpp @@ -1251,6 +1251,27 @@ int i = SVGPropertyOwnerRegistry::fastAnimatedPropertyLookup() } +namespace GH61824 { + +template // #T_Type +concept C = true; + +constexpr bool f(C auto) { // #GH61824_f + return true; +} + +C auto x = 0; +// expected-error@#T_Type {{type 'int' cannot be used prior to '::'}} \ +// expected-note@-1 {{in instantiation of default argument}} + +// This will be fixed when we merge https://github.com/llvm/llvm-project/pull/141776 +// Which makes us behave like GCC. +static_assert(f(0)); +// expected-error@-1 {{no matching function for call}} \ +// expected-note@#GH61824_f {{constraints not satisfied}} \ +// expected-note@#T_Type {{type 'int' cannot be used prior to '::'}} + +} namespace GH149986 { template concept PerfectSquare = [](){} // expected-note 2{{here}} From a4993a27fb005c2c65e065e9d7703533f4d26bd2 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Sat, 13 Sep 2025 06:03:46 -0400 Subject: [PATCH 218/734] [AArch64] Combine ADDS and SUBS nodes with the non-flag setting versions (#157563) We do that with the other flag setting nodes. We should do this with all flag setting and non-flag setting nodes. --- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +++ llvm/test/CodeGen/AArch64/abds-neg.ll | 25 +++++++++---------- llvm/test/CodeGen/AArch64/abds.ll | 3 +-- llvm/test/CodeGen/AArch64/abdu-neg.ll | 25 +++++++++---------- llvm/test/CodeGen/AArch64/abdu.ll | 3 +-- llvm/test/CodeGen/AArch64/adds_cmn.ll | 6 ++--- 6 files changed, 32 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c9a756da0078d..d7c90bcb9723d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27572,6 +27572,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false)) return R; return performFlagSettingCombine(N, DCI, AArch64ISD::SBC); + case AArch64ISD::ADDS: + return performFlagSettingCombine(N, DCI, ISD::ADD); + case AArch64ISD::SUBS: + return performFlagSettingCombine(N, DCI, ISD::SUB); case AArch64ISD::BICi: { APInt DemandedBits = APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits()); diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 02c76ba7343a0..37319642f5b34 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w1, w8 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w8, w1 +; CHECK-NEXT: cneg w0, w8, gt ; CHECK-NEXT: ret %aext = sext i16 %a to i64 %bext = sext i32 %b to i64 @@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, gt ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: subs w8, w8, w0 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w0, w1, sxth +; CHECK-NEXT: cneg w0, w8, gt ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i16 %b to i64 @@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, gt ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, ge +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, gt ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, ge +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, gt ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index bf52e71ec21fe..30ac22cfb6b1f 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -112,8 +112,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: subs w8, w0, w8 +; CHECK-NEXT: subs w8, w0, w1, sxth ; CHECK-NEXT: cneg w0, w8, le ; CHECK-NEXT: ret %aext = sext i32 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 400031b64cb84..79fc12ea76f63 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w1, w8 -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: subs w8, w8, w1 +; CHECK-NEXT: cneg w0, w8, hi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i32 %b to i64 @@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, hi ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: subs w8, w8, w0 -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: subs w8, w0, w1, uxth +; CHECK-NEXT: cneg w0, w8, hi ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i16 %b to i64 @@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, hi ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, hs +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, hi ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, hs +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, hi ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index 8d2b0b0742d7d..af4ce92b16342 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -112,8 +112,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: subs w8, w0, w8 +; CHECK-NEXT: subs w8, w0, w1, uxth ; CHECK-NEXT: cneg w0, w8, ls ; CHECK-NEXT: ret %aext = zext i32 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/adds_cmn.ll b/llvm/test/CodeGen/AArch64/adds_cmn.ll index 7f1cb0df049b1..aa070b7886ba5 100644 --- a/llvm/test/CodeGen/AArch64/adds_cmn.ll +++ b/llvm/test/CodeGen/AArch64/adds_cmn.ll @@ -4,10 +4,8 @@ define { i32, i32 } @adds_cmn(i32 noundef %x, i32 noundef %y) { ; CHECK-LABEL: adds_cmn: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmn w0, w1 -; CHECK-NEXT: add w1, w0, w1 -; CHECK-NEXT: cset w8, lo -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: adds w1, w0, w1 +; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret entry: %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) From 86397f55d5b3ac2ebefc91bbf1a7a6a23b44a3e2 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 13 Sep 2025 13:51:14 +0300 Subject: [PATCH 219/734] [M68k] Add missing dependency on TargetParser Became necessary after f3efbce4. --- llvm/lib/Target/M68k/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/M68k/CMakeLists.txt b/llvm/lib/Target/M68k/CMakeLists.txt index b730f41b22353..1ac7e211a996c 100644 --- a/llvm/lib/Target/M68k/CMakeLists.txt +++ b/llvm/lib/Target/M68k/CMakeLists.txt @@ -51,6 +51,7 @@ add_llvm_target(M68kCodeGen SelectionDAG Support Target + TargetParser ADD_TO_COMPONENT M68k From ee2a225a25fbc41fc7a47e089f09022f90eeaac3 Mon Sep 17 00:00:00 2001 From: Ryan Kim Date: Sat, 13 Sep 2025 21:04:55 +0900 Subject: [PATCH 220/734] [mlir] Fix correct memset range in `OwningMemRef` zero-init (#158200) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `OwningMemref` allocates with overprovision + manual alignment. This is fixing the zero-initialization of the data, the existing code was potentially overrunning the allocation: ```cpp memset(descriptor.data, 0, size + desiredAlignment); // ❌ may overrun ``` This is invalid because `descriptor.data` (the aligned pointer) **does not point to the full allocated block** (`size + desiredAlignment`). Zeroing that much from the aligned start can write past the end of the allocation. Instead we only initialize the data from the aligned pointer for the expected buffer size. The padding from [allocatedPtr, alignedDataPtr] is left untouched. --- .../include/mlir/ExecutionEngine/MemRefUtils.h | 10 ++++------ mlir/unittests/ExecutionEngine/Invoke.cpp | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/MemRefUtils.h b/mlir/include/mlir/ExecutionEngine/MemRefUtils.h index d66d757cb7a8e..e9471731afe13 100644 --- a/mlir/include/mlir/ExecutionEngine/MemRefUtils.h +++ b/mlir/include/mlir/ExecutionEngine/MemRefUtils.h @@ -164,19 +164,17 @@ class OwningMemRef { int64_t nElements = 1; for (int64_t s : shapeAlloc) nElements *= s; - auto [data, alignedData] = + auto [allocatedPtr, alignedData] = detail::allocAligned(nElements, allocFun, alignment); - descriptor = detail::makeStridedMemRefDescriptor(data, alignedData, - shape, shapeAlloc); + descriptor = detail::makeStridedMemRefDescriptor( + allocatedPtr, alignedData, shape, shapeAlloc); if (init) { for (StridedMemrefIterator it = descriptor.begin(), end = descriptor.end(); it != end; ++it) init(*it, it.getIndices()); } else { - memset(descriptor.data, 0, - nElements * sizeof(T) + - alignment.value_or(detail::nextPowerOf2(sizeof(T)))); + memset(alignedData, 0, nElements * sizeof(T)); } } /// Take ownership of an existing descriptor with a custom deleter. diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp index cdeeca20610f0..3161c7053f7a4 100644 --- a/mlir/unittests/ExecutionEngine/Invoke.cpp +++ b/mlir/unittests/ExecutionEngine/Invoke.cpp @@ -251,6 +251,24 @@ TEST(NativeMemRefJit, SKIP_WITHOUT_JIT(BasicMemref)) { EXPECT_EQ((a[{2, 1}]), 42.); } +TEST(NativeMemRefJit, SKIP_WITHOUT_JIT(OwningMemrefZeroInit)) { + constexpr int k = 3; + constexpr int m = 7; + int64_t shape[] = {k, m}; + // Use a large alignment to stress the case where the memref data/basePtr are + // disjoint. + int alignment = 8192; + OwningMemRef a(shape, {}, {}, alignment); + ASSERT_EQ( + (void *)(((uintptr_t)a->basePtr + alignment - 1) & ~(alignment - 1)), + a->data); + for (int i = 0; i < k; ++i) { + for (int j = 0; j < m; ++j) { + EXPECT_EQ((a[{i, j}]), 0.); + } + } +} + // A helper function that will be called from the JIT static void memrefMultiply(::StridedMemRefType *memref, int32_t coefficient) { From de04d422dd936a5c722b26fcaf4c9a46c2c23b14 Mon Sep 17 00:00:00 2001 From: 0xdeadbeaf <0xdeadbeaf@tutamail.com> Date: Sat, 13 Sep 2025 12:15:04 +0000 Subject: [PATCH 221/734] [clang-tidy] Fixed typo for bugprone-easily-swappable-parameters clang-tidy check (#158282) I came across this little typo mistake while reading the docs, so I've fixed it :) --- .../bugprone/EasilySwappableParametersCheck.cpp | 12 ++++++------ .../bugprone/EasilySwappableParametersCheck.h | 2 +- clang-tools-extra/docs/ReleaseNotes.rst | 11 ++++++++--- .../checks/bugprone/easily-swappable-parameters.rst | 2 +- .../bugprone/easily-swappable-parameters-ignore.cpp | 2 +- ...sily-swappable-parameters-implicit-qualifiers.cpp | 2 +- .../bugprone/easily-swappable-parameters-implicits.c | 2 +- .../easily-swappable-parameters-implicits.cpp | 2 +- .../bugprone/easily-swappable-parameters-len2.cpp | 2 +- .../bugprone/easily-swappable-parameters-len3.cpp | 2 +- .../easily-swappable-parameters-prefixsuffixname.cpp | 2 +- .../easily-swappable-parameters-qualifiermixing.cpp | 2 +- .../easily-swappable-parameters-relatedness.c | 4 ++-- .../easily-swappable-parameters-relatedness.cpp | 2 +- .../checkers/bugprone/easily-swappable-parameters.c | 4 ++-- 15 files changed, 29 insertions(+), 24 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index 3c718f1ddbe95..c426b32ccade3 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -82,7 +82,7 @@ static constexpr bool DefaultModelImplicitConversions = true; /// used together. static constexpr bool DefaultSuppressParametersUsedTogether = true; -/// The default value for the NamePrefixSuffixSilenceDissimilarityTreshold +/// The default value for the NamePrefixSuffixSilenceDissimilarityThreshold /// check option. static constexpr std::size_t DefaultNamePrefixSuffixSilenceDissimilarityTreshold = 1; @@ -1435,7 +1435,7 @@ static MixableParameterRange modelMixingRange( StringRef PrevParamName = FD->getParamDecl(I - 1)->getName(); if (!ParamName.empty() && !PrevParamName.empty() && filter::prefixSuffixCoverUnderThreshold( - Check.NamePrefixSuffixSilenceDissimilarityTreshold, PrevParamName, + Check.NamePrefixSuffixSilenceDissimilarityThreshold, PrevParamName, ParamName)) { LLVM_DEBUG(llvm::dbgs() << "Parameter '" << ParamName << "' follows a pattern with previous parameter '" @@ -2108,8 +2108,8 @@ EasilySwappableParametersCheck::EasilySwappableParametersCheck( SuppressParametersUsedTogether( Options.get("SuppressParametersUsedTogether", DefaultSuppressParametersUsedTogether)), - NamePrefixSuffixSilenceDissimilarityTreshold( - Options.get("NamePrefixSuffixSilenceDissimilarityTreshold", + NamePrefixSuffixSilenceDissimilarityThreshold( + Options.get("NamePrefixSuffixSilenceDissimilarityThreshold", DefaultNamePrefixSuffixSilenceDissimilarityTreshold)) {} void EasilySwappableParametersCheck::storeOptions( @@ -2123,8 +2123,8 @@ void EasilySwappableParametersCheck::storeOptions( Options.store(Opts, "ModelImplicitConversions", ModelImplicitConversions); Options.store(Opts, "SuppressParametersUsedTogether", SuppressParametersUsedTogether); - Options.store(Opts, "NamePrefixSuffixSilenceDissimilarityTreshold", - NamePrefixSuffixSilenceDissimilarityTreshold); + Options.store(Opts, "NamePrefixSuffixSilenceDissimilarityThreshold", + NamePrefixSuffixSilenceDissimilarityThreshold); } void EasilySwappableParametersCheck::registerMatchers(MatchFinder *Finder) { diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h index 055ae80dee8f3..9d1037ade727a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h @@ -54,7 +54,7 @@ class EasilySwappableParametersCheck : public ClangTidyCheck { /// either end for the report about the parameters to be silenced. /// E.g. the names "LHS" and "RHS" are 1-dissimilar suffixes of each other, /// while "Text1" and "Text2" are 1-dissimilar prefixes of each other. - const std::size_t NamePrefixSuffixSilenceDissimilarityTreshold; + const std::size_t NamePrefixSuffixSilenceDissimilarityThreshold; }; } // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 23d757b5e6f2e..34091906cbff2 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -135,7 +135,7 @@ Improvements to clang-tidy :program:`clang-tidy-20`. Users should use the check-specific options of the same name instead. -- Improved :program:`run-clang-tidy.py` and :program:`clang-tidy-diff.py` +- Improved :program:`run-clang-tidy.py` and :program:`clang-tidy-diff.py` scripts by adding the `-hide-progress` option to suppress progress and informational messages. @@ -190,6 +190,11 @@ New check aliases Changes in existing checks ^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Improved :doc:`bugprone-easily-swappable-parameters + ` check by + correcting a spelling mistake on its option + ``NamePrefixSuffixSilenceDissimilarityTreshold``. + - Improved :doc:`bugprone-infinite-loop ` check by adding detection for variables introduced by structured bindings. @@ -213,8 +218,8 @@ Changes in existing checks tagged union respectively. - Improved :doc:`bugprone-unchecked-optional-access - ` check by supporting - ``NullableValue::makeValue`` and ``NullableValue::makeValueInplace`` to + ` check by supporting + ``NullableValue::makeValue`` and ``NullableValue::makeValueInplace`` to prevent false-positives for ``BloombergLP::bdlb::NullableValue`` type. - Improved :doc:`bugprone-unhandled-self-assignment diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/easily-swappable-parameters.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/easily-swappable-parameters.rst index 47970bfbbc400..a96d7f6015bda 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/easily-swappable-parameters.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/easily-swappable-parameters.rst @@ -169,7 +169,7 @@ noisiness. * Separate ``return`` statements return either of the parameters on different code paths. -.. option:: NamePrefixSuffixSilenceDissimilarityTreshold +.. option:: NamePrefixSuffixSilenceDissimilarityThreshold The number of characters two parameter names might be different on *either* the head or the tail end with the rest of the name the same so that the diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-ignore.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-ignore.cpp index 27104b93da0ac..be4db2e95ffca 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-ignore.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-ignore.cpp @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- void ignoredUnnamed(int I, int, int) {} // NO-WARN: No >= 2 length of non-unnamed. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicit-qualifiers.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicit-qualifiers.cpp index dc89dc68f4538..cc8332635e107 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicit-qualifiers.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicit-qualifiers.cpp @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 1, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 1, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- void numericAndQualifierConversion(int I, const double CD) { numericAndQualifierConversion(CD, I); } diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicits.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicits.c index b9efc99c477b2..7d278647001d4 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicits.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicits.c @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 1, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- void implicitDoesntBreakOtherStuff(int A, int B) {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicits.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicits.cpp index 44ba5d5d9f590..7518e3fb031f5 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicits.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-implicits.cpp @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 1, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- void implicitDoesntBreakOtherStuff(int A, int B) {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-len2.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-len2.cpp index ac7cc7d9a7b7c..d933891072a54 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-len2.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-len2.cpp @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- namespace std { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-len3.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-len3.cpp index 05900068e62f9..bf9ceb112a61f 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-len3.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-len3.cpp @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- int add(int Left, int Right) { return Left + Right; } // NO-WARN: Only 2 parameters. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-prefixsuffixname.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-prefixsuffixname.cpp index 72ce54e517304..00e54d0df690e 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-prefixsuffixname.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-prefixsuffixname.cpp @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 1 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 1 \ // RUN: }}' -- namespace std { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-qualifiermixing.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-qualifiermixing.cpp index 2bfcefcc56fa0..61159bfa022fb 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-qualifiermixing.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-qualifiermixing.cpp @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 1, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- typedef int MyInt1; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-relatedness.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-relatedness.c index 45752de36a90c..0f325f0ab7ac5 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-relatedness.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-relatedness.c @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 1, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- -Wno-strict-prototypes -x c // // RUN: %check_clang_tidy -std=c23-or-later %s bugprone-easily-swappable-parameters %t \ @@ -17,7 +17,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 1, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- -Wno-strict-prototypes -x c int add(int X, int Y); diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-relatedness.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-relatedness.cpp index 9ede3dc5f8b8b..9214522070c3f 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-relatedness.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters-relatedness.cpp @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 1, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- namespace std { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters.c index be44cfc889ed0..25d27b3dba222 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/easily-swappable-parameters.c @@ -6,7 +6,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- -Wno-strict-prototypes // // RUN: %check_clang_tidy -std=c23-or-later %s bugprone-easily-swappable-parameters %t \ @@ -17,7 +17,7 @@ // RUN: bugprone-easily-swappable-parameters.QualifiersMix: 0, \ // RUN: bugprone-easily-swappable-parameters.ModelImplicitConversions: 0, \ // RUN: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether: 0, \ -// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold: 0 \ +// RUN: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityThreshold: 0 \ // RUN: }}' -- -Wno-strict-prototypes #define bool _Bool From d976be0bb4f706a7f8d446cd5639651db66bf7a9 Mon Sep 17 00:00:00 2001 From: Timothy Hoffman <4001421+tim-hoffman@users.noreply.github.com> Date: Sat, 13 Sep 2025 13:19:34 +0100 Subject: [PATCH 222/734] [mlir] Documentation typo fixes (#135732) --- mlir/include/mlir/Transforms/DialectConversion.h | 2 +- mlir/include/mlir/Transforms/Passes.td | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index f8caae3ce9995..bfbe12d2a5668 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -223,7 +223,7 @@ class TypeConverter { } /// Register a conversion function for attributes within types. Type - /// converters may call this function in order to allow hoking into the + /// converters may call this function in order to allow hooking into the /// translation of attributes that exist within types. For example, a type /// converter for the `memref` type could use these conversions to convert /// memory spaces or layouts in an extensible way. diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td index 039fbaed47165..beb59784947c5 100644 --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -364,8 +364,8 @@ def Mem2Reg : Pass<"mem2reg"> { let description = [{ This pass removes loads out of and stores into a memory slot, and turns them into direct uses of SSA values. This is done generically using the - `PromoteAllocationOpInterface`, `PromoteOpInterface` and - `PromoteMemOpInterface` interfaces. + `PromotableAllocationOpInterface`, `PromotableOpInterface` and + `PromotableMemOpInterface` interfaces. This pass will attempt to compute which definitions of the content of the memory slot reach operations that use the memory slot pointer. It From d594a009c9557698c412d2a27d36b24120d390c2 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 13 Sep 2025 05:36:56 -0700 Subject: [PATCH 223/734] [ADT] Simplify IntMask (NFC) (#158410) We can do the shift in uintptr_t without going through intptr_t. --- llvm/include/llvm/ADT/PointerIntPair.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h index e48e35d476c80..75e3a58e7ca61 100644 --- a/llvm/include/llvm/ADT/PointerIntPair.h +++ b/llvm/include/llvm/ADT/PointerIntPair.h @@ -180,7 +180,7 @@ struct PointerIntPairInfo { IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits, /// IntMask - This is the unshifted mask for valid bits of the int type. - IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1), + IntMask = ((uintptr_t)1 << IntBits) - 1, // ShiftedIntMask - This is the bits for the integer shifted in place. ShiftedIntMask = (uintptr_t)(IntMask << IntShift) From 3f4eb814452e243ceef6e04494f6525dc8ce13e6 Mon Sep 17 00:00:00 2001 From: 0xdeadbeaf <0xdeadbeaf@tutamail.com> Date: Sat, 13 Sep 2025 13:01:30 +0000 Subject: [PATCH 224/734] [cland-tidy][NFC] Fixed documentation for modernize-make-shared check (#158421) I was surprised to see a different value for the `MakeSmartPtrFunctionHeader` option of the `modernize-make-unique` and `modernize-make-shared` clang-tidy checks. See, respectively: [modernize-make-unique](https://github.com/llvm/llvm-project/blob/86397f55d5b3ac2ebefc91bbf1a7a6a23b44a3e2/clang-tools-extra/docs/clang-tidy/checks/modernize/make-unique.rst?plain=1#L40) and [modernize-make-shared](https://github.com/llvm/llvm-project/blob/86397f55d5b3ac2ebefc91bbf1a7a6a23b44a3e2/clang-tools-extra/docs/clang-tidy/checks/modernize/make-shared.rst?plain=1#L40). From what I've read in the codebase, I believe both checks use the same default value for the `MakeSmartPtrFunctionHeader`: `` (from [here](https://github.com/llvm/llvm-project/blob/d976be0bb4f706a7f8d446cd5639651db66bf7a9/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp#L45-L46)). --- .../docs/clang-tidy/checks/modernize/make-shared.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/make-shared.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/make-shared.rst index 982138fc5e781..cd953e7ee394d 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/make-shared.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/make-shared.rst @@ -37,7 +37,7 @@ Options .. option:: MakeSmartPtrFunctionHeader A string specifying the corresponding header of make-shared-ptr function. - Default is `memory`. + Default is ``. .. option:: IncludeStyle From 5dbcbb65abc808b2909d693038fd7e461e282371 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 13 Sep 2025 07:50:37 -0700 Subject: [PATCH 225/734] [ADT] Store integers by value in Twine (NFC) (#158409) This patch stores integers by value in Twine for simplicity. I don't think there is a good reason to store char, unsigned, and int by value and all the other integers by pointers. --- llvm/include/llvm/ADT/Twine.h | 28 ++++++++++++---------------- llvm/lib/Support/Twine.cpp | 18 +++++++++--------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/ADT/Twine.h b/llvm/include/llvm/ADT/Twine.h index 4ed4898df5459..249fb0ad83808 100644 --- a/llvm/include/llvm/ADT/Twine.h +++ b/llvm/include/llvm/ADT/Twine.h @@ -150,11 +150,11 @@ class Twine { char character; unsigned int decUI; int decI; - const unsigned long *decUL; - const long *decL; - const unsigned long long *decULL; - const long long *decLL; - const uint64_t *uHex; + unsigned long decUL; + long decL; + unsigned long long decULL; + long long decLL; + uint64_t uHex; }; /// LHS - The prefix in the concatenation, which may be uninitialized for @@ -336,22 +336,18 @@ class Twine { explicit Twine(int Val) : LHSKind(DecIKind) { LHS.decI = Val; } /// Construct a twine to print \p Val as an unsigned decimal integer. - explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) { - LHS.decUL = &Val; - } + explicit Twine(unsigned long Val) : LHSKind(DecULKind) { LHS.decUL = Val; } /// Construct a twine to print \p Val as a signed decimal integer. - explicit Twine(const long &Val) : LHSKind(DecLKind) { LHS.decL = &Val; } + explicit Twine(long Val) : LHSKind(DecLKind) { LHS.decL = Val; } /// Construct a twine to print \p Val as an unsigned decimal integer. - explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) { - LHS.decULL = &Val; + explicit Twine(unsigned long long Val) : LHSKind(DecULLKind) { + LHS.decULL = Val; } /// Construct a twine to print \p Val as a signed decimal integer. - explicit Twine(const long long &Val) : LHSKind(DecLLKind) { - LHS.decLL = &Val; - } + explicit Twine(long long Val) : LHSKind(DecLLKind) { LHS.decLL = Val; } // FIXME: Unfortunately, to make sure this is as efficient as possible we // need extra binary constructors from particular types. We can't rely on @@ -389,9 +385,9 @@ class Twine { /// @{ // Construct a twine to print \p Val as an unsigned hexadecimal integer. - static Twine utohexstr(const uint64_t &Val) { + static Twine utohexstr(uint64_t Val) { Child LHS, RHS; - LHS.uHex = &Val; + LHS.uHex = Val; RHS.twine = nullptr; return Twine(LHS, UHexKind, RHS, EmptyKind); } diff --git a/llvm/lib/Support/Twine.cpp b/llvm/lib/Support/Twine.cpp index d6b48166fb0f6..9d449161c298b 100644 --- a/llvm/lib/Support/Twine.cpp +++ b/llvm/lib/Support/Twine.cpp @@ -88,19 +88,19 @@ void Twine::printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const { OS << Ptr.decI; break; case Twine::DecULKind: - OS << *Ptr.decUL; + OS << Ptr.decUL; break; case Twine::DecLKind: - OS << *Ptr.decL; + OS << Ptr.decL; break; case Twine::DecULLKind: - OS << *Ptr.decULL; + OS << Ptr.decULL; break; case Twine::DecLLKind: - OS << *Ptr.decLL; + OS << Ptr.decLL; break; case Twine::UHexKind: - OS.write_hex(*Ptr.uHex); + OS.write_hex(Ptr.uHex); break; } } @@ -144,16 +144,16 @@ void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr, NodeKind Kind) const { OS << "decI:\"" << Ptr.decI << "\""; break; case Twine::DecULKind: - OS << "decUL:\"" << *Ptr.decUL << "\""; + OS << "decUL:\"" << Ptr.decUL << "\""; break; case Twine::DecLKind: - OS << "decL:\"" << *Ptr.decL << "\""; + OS << "decL:\"" << Ptr.decL << "\""; break; case Twine::DecULLKind: - OS << "decULL:\"" << *Ptr.decULL << "\""; + OS << "decULL:\"" << Ptr.decULL << "\""; break; case Twine::DecLLKind: - OS << "decLL:\"" << *Ptr.decLL << "\""; + OS << "decLL:\"" << Ptr.decLL << "\""; break; case Twine::UHexKind: OS << "uhex:\"" << Ptr.uHex << "\""; From f94e36d2d398018d2e21a650301f27260d10fe26 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 13 Sep 2025 07:50:45 -0700 Subject: [PATCH 226/734] [llvm] Proofread TestingGuide.rst (#158411) --- llvm/docs/TestingGuide.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst index f79d7eacd5b6c..6ab33383e929b 100644 --- a/llvm/docs/TestingGuide.rst +++ b/llvm/docs/TestingGuide.rst @@ -35,7 +35,7 @@ tests are contained inside the LLVM repository itself under ``llvm/unittests`` and ``llvm/test`` respectively and are expected to always pass. They should be run before every commit. -The whole programs tests are referred to as the "LLVM test suite" (or +The whole-program tests are referred to as the "LLVM test suite" (or "test-suite") and are in the ``test-suite`` `repository on GitHub `_. For historical reasons, these tests are also referred to as the "nightly @@ -49,7 +49,7 @@ Unit tests are written using `Google Test `_ and are located in the ``llvm/unittests`` directory. In general, unit tests are reserved for targeting the support library and other -generic data structure, we prefer relying on regression tests for testing +generic data structure. We prefer relying on regression tests for testing transformations and analysis on the IR. Regression tests @@ -69,7 +69,7 @@ piece of LLVM IR distilled from an actual application or benchmark. Testing Analysis ---------------- -An analysis is a pass that infer properties on some part of the IR and not +An analysis is a pass to infer properties on some part of the IR without transforming it. They are tested in general using the same infrastructure as the regression tests, by creating a separate "Printer" pass to consume the analysis result and print it on the standard output in a textual format suitable for @@ -90,7 +90,7 @@ flags, and then executed to capture the program output and timing information. The output of these programs is compared to a reference output to ensure that the program is being compiled correctly. -In addition to compiling and executing programs, whole program tests +In addition to compiling and executing programs, whole-program tests serve as a way of benchmarking LLVM performance, both in terms of the efficiency of the programs generated as well as the speed with which LLVM compiles, optimizes, and generates code. @@ -104,7 +104,7 @@ Debugging Information tests --------------------------- The test suite contains tests to check the quality of debugging information. -The tests are written in C based languages or in LLVM assembly language. +The tests are written in C-based languages or in LLVM assembly language. These tests are compiled and run under a debugger. The debugger output is checked to validate the debugging information. See ``README.txt`` in the @@ -139,7 +139,7 @@ To run all of the LLVM regression tests, use the ``check-llvm`` target: % make check-llvm In order to get reasonable testing performance, build LLVM and subprojects -in release mode, i.e. +in release mode, i.e., .. code-block:: bash @@ -159,7 +159,7 @@ variable to pass the required options to lit. For example, you can use: % make check LIT_OPTS="-v --vg --vg-leak" -to enable testing with valgrind and with leak checking enabled. +to enable testing with Valgrind and with leak checking enabled. To run individual tests or subsets of tests, you can use the ``llvm-lit`` script which is built as part of LLVM. For example, to run the @@ -202,13 +202,13 @@ The LLVM regression tests are driven by :program:`lit` and are located in the This directory contains a large array of small tests that exercise various features of LLVM and to ensure that regressions do not occur. -The directory is broken into several sub-directories, each focused on a +The directory is broken into several subdirectories, each focused on a particular area of LLVM. Writing new regression tests ---------------------------- -The regression test structure is very simple, but does require some +The regression test structure is very simple but does require some information to be set. This information is gathered via ``cmake`` and is written to a file, ``test/lit.site.cfg.py`` in the build directory. The ``llvm/test`` Makefile does this work for you. @@ -299,7 +299,7 @@ top to indicate that assertions were automatically generated. If you want to update assertions in an existing test case, pass the `-u` option which first checks the ``NOTE:`` line exists and matches the script name. -Sometimes a test absolutely depends on hand-written assertions and should not +Sometimes, a test absolutely depends on hand-written assertions and should not have assertions automatically generated. In that case, add the text ``NOTE: Do not autogenerate`` to the first line, and the scripts will skip that test. It is a good idea to explain why generated assertions will not work for the test @@ -428,7 +428,7 @@ For convenience, these are the contents: !llvm.ident = !{!0} !0 = metadata !{metadata !"Compiler V3"} -For symmetry reasons, ``ident.ll`` is just a dummy file that doesn't +For symmetry, ``ident.ll`` is just a dummy file that doesn't actually participate in the test besides holding the ``RUN:`` lines. .. note:: @@ -470,7 +470,7 @@ content. The script will prepare extra files with ``split-file``, invoke ``gen``, and then rewrite the part after ``gen`` with its stdout. -For convenience, if the test needs one single assembly file, you can also wrap +For convenience, if the test needs a single assembly file, you can also wrap ``gen`` and its required files with ``.ifdef`` and ``.endif``. Then you can skip ``split-file`` in ``RUN`` lines. @@ -869,7 +869,7 @@ Additional substitutions can be defined as follows: substitutions for all tests in a test directory. They do so by extending the substitution list, ``config.substitutions``. Each item in the list is a tuple consisting of a pattern and its replacement, which lit applies as plain text - (even if it contains sequences that python's ``re.sub`` considers to be + (even if it contains sequences that Python's ``re.sub`` considers to be escape sequences). - To define substitutions within a single test file, lit supports the ``DEFINE:`` and ``REDEFINE:`` directives, described in detail below. So that @@ -976,7 +976,7 @@ directives: colons. This syntax has a few advantages: - It is impossible for ``%{name}`` to contain sequences that are special in - python's ``re.sub`` patterns. Otherwise, attempting to specify + Python's ``re.sub`` patterns. Otherwise, attempting to specify ``%{name}`` as a substitution pattern in a lit configuration file could produce confusing expansions. - The braces help avoid the possibility that another substitution's pattern @@ -1039,7 +1039,7 @@ To address such use cases, lit configuration files support to specify the maximum number of passes through the substitution list. Thus, in the above example, setting the limit to 2 would cause lit to make a second pass that expands ``%{inner}`` in the ``RUN:`` line, and the output from the ``echo`` -command when then be: +command would then be: .. code-block:: shell @@ -1094,7 +1094,7 @@ a test fails. Finally, any line that contains "END." will cause the special interpretation of lines to terminate. This is generally done right after -the last RUN: line. This has two side effects: +the last ``RUN:`` line. This has two side effects: (a) it prevents special interpretation of lines that are part of the test program, not the instructions to the test case, and From 111de45e838a7d33637da8807212284c988f7fae Mon Sep 17 00:00:00 2001 From: Naveen Seth Hanig Date: Sat, 13 Sep 2025 21:50:05 +0530 Subject: [PATCH 227/734] [clang-scan-deps] Move command-line generation out of critical section (#158187) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first call to getBuildArguments() can be costly. Although the original author’s comment (from commit 3b1a686) states that it should be called outside the critical section, it is currently invoked within the locked region. This change moves it outside the critical section to match the original intent and reduce lock contention. --- clang/tools/clang-scan-deps/ClangScanDeps.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index f10b73278381b..0e2758d123edc 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -429,12 +429,12 @@ class FullDeps { auto Res = Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)}); NewMDs.push_back(&Res->second); } - // First call to \c getBuildArguments is somewhat expensive. Let's call it - // on the current thread (instead of the main one), and outside the - // critical section. - for (ModuleDeps *MD : NewMDs) - (void)MD->getBuildArguments(); } + // First call to \c getBuildArguments is somewhat expensive. Let's call it + // on the current thread (instead of the main one), and outside the + // critical section. + for (ModuleDeps *MD : NewMDs) + (void)MD->getBuildArguments(); } bool roundTripCommand(ArrayRef ArgStrs, From ef7e03a2d1571e3c651fa9ccd205dbacf1a103c8 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 13 Sep 2025 18:13:22 +0100 Subject: [PATCH 228/734] [VPlan] Limit ExtractLastElem fold to recipes guaranteed single-scalar. vputils::isSingleScalar(A) may return true to recipes that produce only a single scalar value, but they could still end up as vector instruction, because the recipe could not be converted to a single-scalar VPInstruction/VPReplicateRecipe. For now, only apply the fold for recipes guaranteed to produce a single value, i.e. single-scalar VPInstructions and VPReplicateRecipes. Fixes https://github.com/llvm/llvm-project/issues/158319. --- .../Transforms/Vectorize/VPlanTransforms.cpp | 8 +- ...first-order-recurrence-with-uniform-ops.ll | 248 ++++++++++++++++++ .../LoopVectorize/first-order-recurrence.ll | 113 -------- 3 files changed, 253 insertions(+), 116 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index fcd85ba9ab7f0..a193c438e7ea8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1272,9 +1272,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (match(Def, m_VPInstruction(m_VPValue(A))) && - vputils::isSingleScalar(A) && all_of(A->users(), [Def, A](VPUser *U) { - return U->usesScalars(A) || Def == U; - })) { + ((isa(A) && vputils::isSingleScalar(A)) || + (isa(A) && + cast(A)->isSingleScalar())) && + all_of(A->users(), + [Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) { return Def->replaceAllUsesWith(A); } } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll new file mode 100644 index 0000000000000..5ce6d68e05edd --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s --check-prefix=UNROLL-NO-IC +; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s --check-prefix=UNROLL-NO-VF +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s --check-prefix=SINK-AFTER + +; Test case for https://github.com/llvm/llvm-project/issues/95520. +define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst, i64 %n) { +; UNROLL-NO-IC-LABEL: define i32 @recurence_uniform_load( +; UNROLL-NO-IC-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) { +; UNROLL-NO-IC-NEXT: [[ENTRY:.*]]: +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; UNROLL-NO-IC: [[VECTOR_PH]]: +; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8 +; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; UNROLL-NO-IC-NEXT: br label %[[VECTOR_BODY:.*]] +; UNROLL-NO-IC: [[VECTOR_BODY]]: +; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 4 +; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; UNROLL-NO-IC: [[MIDDLE_BLOCK]]: +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; UNROLL-NO-IC: [[SCALAR_PH]]: +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; UNROLL-NO-IC-NEXT: br label %[[LOOP:.*]] +; UNROLL-NO-IC: [[LOOP]]: +; UNROLL-NO-IC-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[LOAD:%.*]], %[[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[ADD]] = add i64 [[PHI]], 1 +; UNROLL-NO-IC-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 +; UNROLL-NO-IC-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], [[N]] +; UNROLL-NO-IC-NEXT: br i1 [[ICMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; UNROLL-NO-IC: [[EXIT]]: +; UNROLL-NO-IC-NEXT: [[RECUR_LCSSA:%.*]] = phi i32 [ [[RECUR]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: ret i32 [[RECUR_LCSSA]] +; +; UNROLL-NO-VF-LABEL: define i32 @recurence_uniform_load( +; UNROLL-NO-VF-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) { +; UNROLL-NO-VF-NEXT: [[ENTRY:.*]]: +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[N]], 1 +; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2 +; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; UNROLL-NO-VF: [[VECTOR_PH]]: +; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2 +; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; UNROLL-NO-VF-NEXT: br label %[[VECTOR_BODY:.*]] +; UNROLL-NO-VF: [[VECTOR_BODY]]: +; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4 +; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; UNROLL-NO-VF: [[MIDDLE_BLOCK]]: +; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; UNROLL-NO-VF: [[SCALAR_PH]]: +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; UNROLL-NO-VF-NEXT: br label %[[LOOP:.*]] +; UNROLL-NO-VF: [[LOOP]]: +; UNROLL-NO-VF-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[LOAD:%.*]], %[[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[ADD]] = add i64 [[PHI]], 1 +; UNROLL-NO-VF-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 +; UNROLL-NO-VF-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], [[N]] +; UNROLL-NO-VF-NEXT: br i1 [[ICMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; UNROLL-NO-VF: [[EXIT]]: +; UNROLL-NO-VF-NEXT: [[RECUR_LCSSA:%.*]] = phi i32 [ [[RECUR]], %[[LOOP]] ], [ [[TMP0]], %[[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: ret i32 [[RECUR_LCSSA]] +; +; SINK-AFTER-LABEL: define i32 @recurence_uniform_load( +; SINK-AFTER-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) { +; SINK-AFTER-NEXT: [[ENTRY:.*]]: +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; SINK-AFTER: [[VECTOR_PH]]: +; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; SINK-AFTER-NEXT: br label %[[VECTOR_BODY:.*]] +; SINK-AFTER: [[VECTOR_BODY]]: +; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 4 +; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SINK-AFTER: [[MIDDLE_BLOCK]]: +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; SINK-AFTER: [[SCALAR_PH]]: +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; SINK-AFTER-NEXT: br label %[[LOOP:.*]] +; SINK-AFTER: [[LOOP]]: +; SINK-AFTER-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; SINK-AFTER-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[LOAD:%.*]], %[[LOOP]] ] +; SINK-AFTER-NEXT: [[ADD]] = add i64 [[PHI]], 1 +; SINK-AFTER-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 +; SINK-AFTER-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], [[N]] +; SINK-AFTER-NEXT: br i1 [[ICMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; SINK-AFTER: [[EXIT]]: +; SINK-AFTER-NEXT: [[RECUR_LCSSA:%.*]] = phi i32 [ [[RECUR]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: ret i32 [[RECUR_LCSSA]] +; +entry: + br label %loop + +loop: + %phi = phi i64 [ 0, %entry ], [ %add, %loop ] + %recur = phi i32 [ 0, %entry ], [ %load, %loop ] + %add = add i64 %phi, 1 + %load = load i32, ptr %src, align 4 + %icmp = icmp ult i64 %phi, %n + br i1 %icmp, label %loop, label %exit + +exit: + ret i32 %recur +} + +; Test for https://github.com/llvm/llvm-project/issues/158319. The recurrence +; phi can be removed. +define i16 @for_phi_removed(ptr %src) { +; UNROLL-NO-IC-LABEL: define i16 @for_phi_removed( +; UNROLL-NO-IC-SAME: ptr [[SRC:%.*]]) { +; UNROLL-NO-IC-NEXT: [[ENTRY:.*]]: +; UNROLL-NO-IC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; UNROLL-NO-IC: [[VECTOR_PH]]: +; UNROLL-NO-IC-NEXT: br label %[[VECTOR_BODY:.*]] +; UNROLL-NO-IC: [[VECTOR_BODY]]: +; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4 +; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104 +; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NO-IC: [[MIDDLE_BLOCK]]: +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 +; UNROLL-NO-IC-NEXT: br label %[[SCALAR_PH]] +; UNROLL-NO-IC: [[SCALAR_PH]]: +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 104, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; UNROLL-NO-IC-NEXT: br label %[[LOOP:.*]] +; UNROLL-NO-IC: [[LOOP]]: +; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4 +; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 +; UNROLL-NO-IC-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0 +; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 111 +; UNROLL-NO-IC-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; UNROLL-NO-IC: [[EXIT]]: +; UNROLL-NO-IC-NEXT: [[P_LCSSA:%.*]] = phi i16 [ [[P]], %[[LOOP]] ] +; UNROLL-NO-IC-NEXT: ret i16 [[P_LCSSA]] +; +; UNROLL-NO-VF-LABEL: define i16 @for_phi_removed( +; UNROLL-NO-VF-SAME: ptr [[SRC:%.*]]) { +; UNROLL-NO-VF-NEXT: [[ENTRY:.*]]: +; UNROLL-NO-VF-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; UNROLL-NO-VF: [[VECTOR_PH]]: +; UNROLL-NO-VF-NEXT: br label %[[VECTOR_BODY:.*]] +; UNROLL-NO-VF: [[VECTOR_BODY]]: +; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0 +; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 110 +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NO-VF: [[MIDDLE_BLOCK]]: +; UNROLL-NO-VF-NEXT: br label %[[SCALAR_PH]] +; UNROLL-NO-VF: [[SCALAR_PH]]: +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 110, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; UNROLL-NO-VF-NEXT: br label %[[LOOP:.*]] +; UNROLL-NO-VF: [[LOOP]]: +; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[P:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4 +; UNROLL-NO-VF-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 +; UNROLL-NO-VF-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0 +; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; UNROLL-NO-VF-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 111 +; UNROLL-NO-VF-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; UNROLL-NO-VF: [[EXIT]]: +; UNROLL-NO-VF-NEXT: [[P_LCSSA:%.*]] = phi i16 [ [[P]], %[[LOOP]] ] +; UNROLL-NO-VF-NEXT: ret i16 [[P_LCSSA]] +; +; SINK-AFTER-LABEL: define i16 @for_phi_removed( +; SINK-AFTER-SAME: ptr [[SRC:%.*]]) { +; SINK-AFTER-NEXT: [[ENTRY:.*]]: +; SINK-AFTER-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; SINK-AFTER: [[VECTOR_PH]]: +; SINK-AFTER-NEXT: br label %[[VECTOR_BODY:.*]] +; SINK-AFTER: [[VECTOR_BODY]]: +; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4 +; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer +; SINK-AFTER-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108 +; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SINK-AFTER: [[MIDDLE_BLOCK]]: +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 +; SINK-AFTER-NEXT: br label %[[SCALAR_PH]] +; SINK-AFTER: [[SCALAR_PH]]: +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 108, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; SINK-AFTER-NEXT: br label %[[LOOP:.*]] +; SINK-AFTER: [[LOOP]]: +; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; SINK-AFTER-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4 +; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 +; SINK-AFTER-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0 +; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; SINK-AFTER-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 111 +; SINK-AFTER-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; SINK-AFTER: [[EXIT]]: +; SINK-AFTER-NEXT: [[P_LCSSA:%.*]] = phi i16 [ [[P]], %[[LOOP]] ] +; SINK-AFTER-NEXT: ret i16 [[P_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] + %p = phi i16 [ 0, %entry ] , [ %sel, %loop ] + %l = load i32, ptr %src, align 4 + %c = icmp eq i32 %l, 0 + %sel = select i1 %c, i16 1, i16 0 + %iv.next = add i16 %iv, 1 + %ec = icmp eq i16 %iv.next, 111 + br i1 %ec, label %exit, label %loop + +exit: + ret i16 %p +} diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 10cbf66c783db..7e288ab0eb76d 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -3619,117 +3619,4 @@ for.end: ret void } -; Test case for https://github.com/llvm/llvm-project/issues/95520. -define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst, i64 %n) { -; UNROLL-NO-IC-LABEL: @recurence_uniform_load( -; UNROLL-NO-IC-NEXT: entry: -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; UNROLL-NO-IC: vector.ph: -; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8 -; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] -; UNROLL-NO-IC: vector.body: -; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC:%.*]], align 4 -; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] -; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] -; UNROLL-NO-IC: loop: -; UNROLL-NO-IC-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[ADD]] = add i64 [[PHI]], 1 -; UNROLL-NO-IC-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 -; UNROLL-NO-IC-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]] -; UNROLL-NO-IC: exit: -; UNROLL-NO-IC-NEXT: ret i32 0 -; -; UNROLL-NO-VF-LABEL: @recurence_uniform_load( -; UNROLL-NO-VF-NEXT: entry: -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[N:%.*]], 1 -; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; UNROLL-NO-VF: vector.ph: -; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2 -; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] -; UNROLL-NO-VF: vector.body: -; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4 -; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] -; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]] -; UNROLL-NO-VF: loop: -; UNROLL-NO-VF-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; UNROLL-NO-VF-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] -; UNROLL-NO-VF-NEXT: [[ADD]] = add i64 [[PHI]], 1 -; UNROLL-NO-VF-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 -; UNROLL-NO-VF-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]] -; UNROLL-NO-VF: exit: -; UNROLL-NO-VF-NEXT: ret i32 0 -; -; SINK-AFTER-LABEL: @recurence_uniform_load( -; SINK-AFTER-NEXT: entry: -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 -; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; SINK-AFTER: vector.ph: -; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 -; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] -; SINK-AFTER: vector.body: -; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC:%.*]], align 4 -; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] -; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: br label [[LOOP:%.*]] -; SINK-AFTER: loop: -; SINK-AFTER-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; SINK-AFTER-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] -; SINK-AFTER-NEXT: [[ADD]] = add i64 [[PHI]], 1 -; SINK-AFTER-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 -; SINK-AFTER-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], [[N]] -; SINK-AFTER-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]] -; SINK-AFTER: exit: -; SINK-AFTER-NEXT: ret i32 0 -; -entry: - br label %loop - -loop: - %phi = phi i64 [ 0, %entry ], [ %add, %loop ] - %recur = phi i32 [ 0, %entry ], [ %load, %loop ] - %add = add i64 %phi, 1 - %load = load i32, ptr %src, align 4 - %icmp = icmp ult i64 %phi, %n - br i1 %icmp, label %loop, label %exit - -exit: - ret i32 0 -} - !2 = !{!"branch_weights", i32 1, i32 1} From a7c963ab763c98188010cd654902cee4734249ee Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 13 Sep 2025 20:51:51 +0300 Subject: [PATCH 229/734] [libc++][test] update `MinSequenceContainer.h` to make one more test pass on MSVC STL (#158344) Continues #140287 `from_range_t` constructor is needed by MSVC STL to pass `std/containers/container.adaptors/flat.set/flat.set.cons/range.pass.cpp`. The rest are added to complete the container according to https://github.com/llvm/llvm-project/pull/140287#pullrequestreview-2848573089. --- libcxx/test/support/MinSequenceContainer.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/libcxx/test/support/MinSequenceContainer.h b/libcxx/test/support/MinSequenceContainer.h index 9af5847f08871..8433ebb39160c 100644 --- a/libcxx/test/support/MinSequenceContainer.h +++ b/libcxx/test/support/MinSequenceContainer.h @@ -29,6 +29,12 @@ struct MinSequenceContainer { template explicit TEST_CONSTEXPR_CXX20 MinSequenceContainer(It first, It last) : data_(first, last) {} TEST_CONSTEXPR_CXX20 MinSequenceContainer(std::initializer_list il) : data_(il) {} + template + TEST_CONSTEXPR_CXX20 MinSequenceContainer(std::from_range_t, Range&& rg) + : data_(std::from_range, std::forward(rg)) {} + TEST_CONSTEXPR_CXX20 MinSequenceContainer(size_type n, T value) : data_(n, value) {} + + MinSequenceContainer& operator=(std::initializer_list il) { data_ = il; } template TEST_CONSTEXPR_CXX20 void assign(It first, It last) { @@ -36,6 +42,10 @@ struct MinSequenceContainer { } TEST_CONSTEXPR_CXX20 void assign(std::initializer_list il) { data_.assign(il); } TEST_CONSTEXPR_CXX20 void assign(size_type n, value_type t) { data_.assign(n, t); } + template + TEST_CONSTEXPR_CXX20 void assign_range(Range&& rg) { + data_.assign_range(std::forward(rg)); + } TEST_CONSTEXPR_CXX20 iterator begin() { return iterator(data_.data()); } TEST_CONSTEXPR_CXX20 const_iterator begin() const { return const_iterator(data_.data()); } TEST_CONSTEXPR_CXX20 const_iterator cbegin() const { return const_iterator(data_.data()); } @@ -55,6 +65,14 @@ struct MinSequenceContainer { return from_vector_iterator(data_.insert(to_vector_iterator(p), std::move(value))); } + TEST_CONSTEXPR_CXX20 iterator insert(const_iterator p, size_type n, T value) { + return from_vector_iterator(data_.insert(to_vector_iterator(p), n, value)); + } + + TEST_CONSTEXPR_CXX20 iterator insert(const_iterator p, std::initializer_list il) { + return from_vector_iterator(data_.insert(to_vector_iterator(p), il)); + } + template TEST_CONSTEXPR_CXX20 iterator insert_range(const_iterator p, Range&& rg) { return from_vector_iterator(data_.insert_range(to_vector_iterator(p), std::forward(rg))); From 4b82db90b8b6b0ff6f6d7238f7f11858b2f1de51 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sat, 13 Sep 2025 12:39:12 -0700 Subject: [PATCH 230/734] [Github] Add CodeQL Action to check Github Actions Workflows (#158394) This should point out obvious security issues with our Github Actions workflows. --- .github/workflows/gha-codeql.yml | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/gha-codeql.yml diff --git a/.github/workflows/gha-codeql.yml b/.github/workflows/gha-codeql.yml new file mode 100644 index 0000000000000..5a7c79d021ade --- /dev/null +++ b/.github/workflows/gha-codeql.yml @@ -0,0 +1,35 @@ +name: Github Actions CodeQL + +permissions: + contents: read + +on: + pull_request: + branches: + - main + schedule: + - cron: '30 0 * * *' + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: true + +jobs: + codeql: + name: 'Github Actions CodeQL' + runs-on: ubuntu-24.04 + permissions: + security-events: write + steps: + - name: Checkout LLVM + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + sparse-checkout: | + .github/ + - name: Initialize CodeQL + uses: github/codeql-action/init@192325c86100d080feab897ff886c34abd4c83a3 # v3.30.3 + with: + languages: actions + queries: security-extended + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@192325c86100d080feab897ff886c34abd4c83a3 # v3.30.3 From 30e9cbacab5b474de89992851f126fff300c1ab7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 13 Sep 2025 20:41:43 +0100 Subject: [PATCH 231/734] [VPlan] Move logic to compute scalarization overhead to cost helper(NFC) Extract the logic to compute the scalarization overhead to a helper for easy re-use in the future. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 30 +++++++++++++++++++ llvm/lib/Transforms/Vectorize/VPlanHelpers.h | 7 +++++ .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 29 ++---------------- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index e3244623ee968..30a3a01ddd949 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1747,3 +1747,33 @@ VPCostContext::getOperandInfo(VPValue *V) const { return TTI::getOperandInfo(V->getLiveInIRValue()); } + +InstructionCost VPCostContext::getScalarizationOverhead( + Type *ResultTy, ArrayRef Operands, ElementCount VF) { + if (VF.isScalar()) + return 0; + + InstructionCost ScalarizationCost = 0; + // Compute the cost of scalarizing the result if needed. + if (!ResultTy->isVoidTy()) { + for (Type *VectorTy : + to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) { + ScalarizationCost += TTI.getScalarizationOverhead( + cast(VectorTy), APInt::getAllOnes(VF.getFixedValue()), + /*Insert=*/true, + /*Extract=*/false, CostKind); + } + } + // Compute the cost of scalarizing the operands, skipping ones that do not + // require extraction/scalarization and do not incur any overhead. + SmallPtrSet UniqueOperands; + SmallVector Tys; + for (auto *Op : Operands) { + if (Op->isLiveIn() || isa(Op) || + !UniqueOperands.insert(Op).second) + continue; + Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF)); + } + return ScalarizationCost + + TTI.getOperandsScalarizationOverhead(Tys, CostKind); +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 5ad2ac6b61e05..fe59774b7c838 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -371,6 +371,13 @@ struct VPCostContext { /// legacy cost model for \p VF. Only used to check for additional VPlan /// simplifications. bool isLegacyUniformAfterVectorization(Instruction *I, ElementCount VF) const; + + /// Estimate the overhead of scalarizing a recipe with result type \p ResultTy + /// and \p Operands with \p VF. This is a convenience wrapper for the + /// type-based getScalarizationOverhead API. + InstructionCost getScalarizationOverhead(Type *ResultTy, + ArrayRef Operands, + ElementCount VF); }; /// This class can be used to assign names to VPValues. For VPValues without diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 11846f863a3fa..c6273074778d1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3132,33 +3132,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, if (VF.isScalable()) return InstructionCost::getInvalid(); - // Compute the cost of scalarizing the result and operands if needed. - InstructionCost ScalarizationCost = 0; - if (VF.isVector()) { - if (!ResultTy->isVoidTy()) { - for (Type *VectorTy : - to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) { - ScalarizationCost += Ctx.TTI.getScalarizationOverhead( - cast(VectorTy), APInt::getAllOnes(VF.getFixedValue()), - /*Insert=*/true, - /*Extract=*/false, Ctx.CostKind); - } - } - // Skip operands that do not require extraction/scalarization and do not - // incur any overhead. - SmallPtrSet UniqueOperands; - Tys.clear(); - for (auto *Op : ArgOps) { - if (Op->isLiveIn() || isa(Op) || - !UniqueOperands.insert(Op).second) - continue; - Tys.push_back(toVectorizedTy(Ctx.Types.inferScalarType(Op), VF)); - } - ScalarizationCost += - Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind); - } - - return ScalarCallCost * VF.getFixedValue() + ScalarizationCost; + return ScalarCallCost * VF.getFixedValue() + + Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF); } case Instruction::Add: case Instruction::Sub: From 9b70b84b89f0de2dc4641da02fe54a6a1ef46e6e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 13 Sep 2025 13:50:33 -0700 Subject: [PATCH 232/734] [ADT] Implement EnumeratedArray with std::array (NFC) (#158407) EnumeratedArray provides an std::array-like interface except that you access the array with an enum index. Now, the problem is that because the underlying array is implemented as a C array, we have to mirror what std::array would do: iterator end() { return begin() + size(); } reverse_iterator rbegin() { return reverse_iterator(end()); } This patch switches to the std::array. This way, we just have to "forward" calls to begin, end, rbegin, rend, etc. Also, we benefit from std::array::fill in one of the constructors. --- llvm/include/llvm/ADT/EnumeratedArray.h | 50 ++++++++++--------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/llvm/include/llvm/ADT/EnumeratedArray.h b/llvm/include/llvm/ADT/EnumeratedArray.h index 93e1327306175..2fe6be434f11b 100644 --- a/llvm/include/llvm/ADT/EnumeratedArray.h +++ b/llvm/include/llvm/ADT/EnumeratedArray.h @@ -15,8 +15,9 @@ #ifndef LLVM_ADT_ENUMERATEDARRAY_H #define LLVM_ADT_ENUMERATEDARRAY_H +#include "llvm/ADT/STLExtras.h" +#include #include -#include namespace llvm { @@ -24,12 +25,15 @@ template (LargestEnum)> class EnumeratedArray { -public: - using iterator = ValueType *; - using const_iterator = const ValueType *; + static_assert(Size > 0); + using ArrayTy = std::array; + ArrayTy Underlying; - using const_reverse_iterator = std::reverse_iterator; - using reverse_iterator = std::reverse_iterator; +public: + using iterator = typename ArrayTy::iterator; + using const_iterator = typename ArrayTy::const_iterator; + using reverse_iterator = typename ArrayTy::reverse_iterator; + using const_reverse_iterator = typename ArrayTy::const_reverse_iterator; using value_type = ValueType; using reference = ValueType &; @@ -38,16 +42,10 @@ class EnumeratedArray { using const_pointer = const ValueType *; EnumeratedArray() = default; - EnumeratedArray(ValueType V) { - for (IndexType IX = 0; IX < Size; ++IX) { - Underlying[IX] = V; - } - } + EnumeratedArray(ValueType V) { Underlying.fill(V); } EnumeratedArray(std::initializer_list Init) { assert(Init.size() == Size && "Incorrect initializer size"); - for (IndexType IX = 0; IX < Size; ++IX) { - Underlying[IX] = *(Init.begin() + IX); - } + llvm::copy(Init, Underlying.begin()); } const ValueType &operator[](Enumeration Index) const { @@ -62,23 +60,15 @@ class EnumeratedArray { IndexType size() const { return Size; } bool empty() const { return size() == 0; } - iterator begin() { return Underlying; } - const_iterator begin() const { return Underlying; } - - iterator end() { return begin() + size(); } - const_iterator end() const { return begin() + size(); } - - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { - return const_reverse_iterator(end()); - } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { - return const_reverse_iterator(begin()); - } + iterator begin() { return Underlying.begin(); } + const_iterator begin() const { return Underlying.begin(); } + iterator end() { return Underlying.end(); } + const_iterator end() const { return Underlying.end(); } -private: - ValueType Underlying[Size]; + reverse_iterator rbegin() { return Underlying.rbegin(); } + const_reverse_iterator rbegin() const { return Underlying.rbegin(); } + reverse_iterator rend() { return Underlying.rend(); } + const_reverse_iterator rend() const { return Underlying.rend(); } }; } // namespace llvm From c70b9c8622e683e46b29609dd1e1b31d8e5025c8 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 13 Sep 2025 13:50:41 -0700 Subject: [PATCH 233/734] [ADT] Use range-based for loops in SparseBitVector.h (NFC) (#158408) --- llvm/include/llvm/ADT/SparseBitVector.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/ADT/SparseBitVector.h b/llvm/include/llvm/ADT/SparseBitVector.h index 7151af6146e6e..90e2336f9f488 100644 --- a/llvm/include/llvm/ADT/SparseBitVector.h +++ b/llvm/include/llvm/ADT/SparseBitVector.h @@ -119,8 +119,8 @@ template struct SparseBitVectorElement { size_type count() const { unsigned NumBits = 0; - for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) - NumBits += llvm::popcount(Bits[i]); + for (BitWord Bit : Bits) + NumBits += llvm::popcount(Bit); return NumBits; } @@ -799,11 +799,8 @@ class SparseBitVector { unsigned count() const { unsigned BitCount = 0; - for (ElementListConstIter Iter = Elements.begin(); - Iter != Elements.end(); - ++Iter) - BitCount += Iter->count(); - + for (const SparseBitVectorElement &Elem : Elements) + BitCount += Elem.count(); return BitCount; } From de7e3a589525179f3b02b84b194aac6cf581425c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 13 Sep 2025 22:09:05 +0100 Subject: [PATCH 234/734] [VPlan] Compute cost of scalar (U|S)Div, (U|S)Rem in computeCost (NFCI). Directly compute the cost of UDiv, SDiv, URem, SRem in VPlan. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index c6273074778d1..f5c8cf106d8b5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3151,9 +3151,23 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: + case Instruction::Select: return *getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), Ctx) * (isSingleScalar() ? 1 : VF.getFixedValue()); + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: { + InstructionCost ScalarCost = *getCostForRecipeWithOpcode( + getOpcode(), ElementCount::getFixed(1), Ctx); + if (isSingleScalar()) + return ScalarCost; + + return ScalarCost * VF.getFixedValue() + + Ctx.getScalarizationOverhead(Ctx.Types.inferScalarType(this), + to_vector(operands()), VF); + } case Instruction::Load: case Instruction::Store: { if (isSingleScalar()) { From bf156a2087f4a6d251d57369f6bd363cfa7adb41 Mon Sep 17 00:00:00 2001 From: Tomohiro Kashiwada Date: Sun, 14 Sep 2025 06:39:33 +0900 Subject: [PATCH 235/734] [Clang](NFC) Add coverage for VTable debug info (#151818) Add test for: - shows effect of inlining member functions - shows effect of template instantiation methods Make cleaner a bit: - drops unnecessary `REQUIRES` clause - uses `%clang_cc1` instead of `%clang -Xclang` - uses `-O0 -disable-llvm-passes` and `-O1 -disable-llvm-passes` because the optimization level affects the linkage of VTables. --- clang/test/DebugInfo/CXX/vtable-external.cpp | 108 ++++++++++++++++++ .../CXX/vtable-inheritance-diamond.cpp | 13 +-- .../CXX/vtable-inheritance-multiple.cpp | 11 +- .../CXX/vtable-inheritance-simple-main.cpp | 34 +++--- .../CXX/vtable-inheritance-simple.cpp | 13 ++- .../CXX/vtable-inheritance-virtual.cpp | 13 +-- .../CXX/vtable-template-instantiation.cpp | 84 ++++++++++++++ 7 files changed, 232 insertions(+), 44 deletions(-) create mode 100644 clang/test/DebugInfo/CXX/vtable-external.cpp create mode 100644 clang/test/DebugInfo/CXX/vtable-template-instantiation.cpp diff --git a/clang/test/DebugInfo/CXX/vtable-external.cpp b/clang/test/DebugInfo/CXX/vtable-external.cpp new file mode 100644 index 0000000000000..b5b34c4123e3b --- /dev/null +++ b/clang/test/DebugInfo/CXX/vtable-external.cpp @@ -0,0 +1,108 @@ +// For the `CInlined` struct, where all member functions are inlined, we check the following cases: +// - If the definition of its destructor is visible: +// * The vtable is generated with a COMDAT specifier +// * Its '_vtable$' is generated +// - Otherwise: +// * The vtable is declared +// * Its '_vtable$' is NOT generated +// +// For the `CNoInline` strcut, where member functions are defined as non-inline, we check the following: +// - Regardless of whether the definition of its destructor is visible or not: +// * The vtable is generated +// * Its '_vtable$' is generated +// +// For the `CNoFnDef` struct, where member functions are declared only, we check the following: +// - Regardless of whether the definition of its destructor is visible or not: +// # when non-optimized: +// * The vtable is declared +// * Its '_vtable$' is NOT generated +// # when optimized even if no LLVM passes: +// * The vtable is declared as `available_externally` (which is potentially turned into `external` by LLVM passes) +// * Its '_vtable$' is generated + +struct CInlined { + virtual void f1() noexcept {} + virtual void f2() noexcept {} + virtual ~CInlined() noexcept; +}; +#ifndef NO_DTOR_BODY +inline CInlined::~CInlined() noexcept {} +#endif + +struct CNoInline { + virtual void g1() noexcept; + virtual void g2() noexcept; + virtual ~CNoInline() noexcept; +}; + +void CNoInline::g1() noexcept {} +void CNoInline::g2() noexcept {} +#ifndef NO_DTOR_BODY +CNoInline::~CNoInline() noexcept {} +#endif + +struct CNoFnDef { + virtual void h1() noexcept; + virtual void h2() noexcept; + virtual ~CNoFnDef() noexcept; +}; + +#ifndef NO_DTOR_BODY +CNoFnDef::~CNoFnDef() noexcept {} +#endif + +int main() { + CInlined Inlined; + CNoInline NoInline; + CNoFnDef NoFnDef; + + return 0; +} + +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - | FileCheck %s -check-prefixes CHECK-HAS-DTOR,CHECK-HAS-DTOR-O0 +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes %s -o - | FileCheck %s -check-prefixes CHECK-HAS-DTOR,CHECK-HAS-DTOR-O1 +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes -DNO_DTOR_BODY %s -o - | FileCheck %s -check-prefixes CHECK-NO-DTOR,CHECK-NO-DTOR-O0 +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes -DNO_DTOR_BODY %s -o - | FileCheck %s -check-prefixes CHECK-NO-DTOR,CHECK-NO-DTOR-O1 + +// CHECK-HAS-DTOR: $_ZTV8CInlined = comdat any +// CHECK-HAS-DTOR-NOT: $_ZTV9CNoInline +// CHECK-HAS-DTOR-NOT: $_ZTV8CNoFnDef + +// CHECK-HAS-DTOR-DAG: @_ZTV8CInlined = linkonce_odr {{.*}}constant {{{ \[[^]]*\] } { \[[^]]*\] \[[^]]*\] }}}, comdat, align 8, !dbg [[INLINED_VTABLE_VAR:![0-9]+]] +// CHECK-HAS-DTOR-DAG: @_ZTV9CNoInline = {{.*}}constant {{{ \[[^]]*\] } { \[[^]]*\] \[[^]]*\] }}}, align 8, !dbg [[NOINLINE_VTABLE_VAR:![0-9]+]] +// CHECK-HAS-DTOR-O0-DAG: @_ZTV8CNoFnDef = external {{.*}}constant {{{ \[[^]]*\] }}}, align 8{{$}} +// CHECK-HAS-DTOR-O1-DAG: @_ZTV8CNoFnDef = available_externally {{.*}}constant {{{ \[[^]]*\] } { \[[^]]*\] \[[^]]*\] }}}, align 8, !dbg [[NOFNDEF_VTABLE_VAR:![0-9]+]] + +// CHECK-HAS-DTOR: !llvm.dbg.cu + +// CHECK-HAS-DTOR-DAG: [[INLINED_VTABLE:![0-9]+]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTV8CInlined" +// CHECK-HAS-DTOR-DAG: [[INLINED_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[INLINED_VTABLE]], expr: !DIExpression()) +// CHECK-HAS-DTOR-DAG: [[INLINED:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CInlined" +// CHECK-HAS-DTOR-DAG: !DIDerivedType(tag: DW_TAG_variable, name: "_vtable$", scope: [[INLINED]], file: {{.*}}, baseType: {{![0-9]+}}, flags: DIFlagPrivate | DIFlagArtificial | DIFlagStaticMember) + +// CHECK-HAS-DTOR-DAG: [[NOINLINE_VTABLE:![0-9]+]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTV9CNoInline" +// CHECK-HAS-DTOR-DAG: [[NOINLINE_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[NOINLINE_VTABLE]], expr: !DIExpression()) +// CHECK-HAS-DTOR-DAG: [[NOINLINE:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CNoInline" +// CHECK-HAS-DTOR-DAG: !DIDerivedType(tag: DW_TAG_variable, name: "_vtable$", scope: [[NOINLINE]], file: {{.*}}, baseType: {{![0-9]+}}, flags: DIFlagPrivate | DIFlagArtificial | DIFlagStaticMember) + +// CHECK-HAS-DTOR-O1-DAG: [[NOFNDEF_VTABLE:![0-9]+]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTV8CNoFnDef" +// CHECK-HAS-DTOR-O1-DAG: [[NOFNDEF_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[NOFNDEF_VTABLE]], expr: !DIExpression()) + +// CHECK-NO-DTOR-NOT: $_ZTV8CInlined +// CHECK-NO-DTOR-NOT: $_ZTV9CNoInline +// CHECK-NO-DTOR-NOT: $_ZTV8CNoFnDef + +// CHECK-NO-DTOR-DAG: @_ZTV8CInlined = external {{.*}}constant {{.*}}, align 8{{$}} +// CHECK-NO-DTOR-DAG: @_ZTV9CNoInline = {{.*}}constant {{{ \[[^]]*\] } { \[[^]]*\] \[[^]]*\] }}}, align 8, !dbg [[NOINLINE_VTABLE_VAR:![0-9]+]] +// CHECK-NO-DTOR-O0-DAG: @_ZTV8CNoFnDef = external {{.*}}constant {{{ \[[^]]*\] }}}, align 8{{$}} +// CHECK-NO-DTOR-O1-DAG: @_ZTV8CNoFnDef = available_externally {{.*}}constant {{{ \[[^]]*\] } { \[[^]]*\] \[[^]]*\] }}}, align 8, !dbg [[NOFNDEF_VTABLE_VAR:![0-9]+]] + +// CHECK-NO-DTOR: !llvm.dbg.cu + +// CHECK-NO-DTOR-DAG: [[NOINLINE_VTABLE:![0-9]+]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTV9CNoInline" +// CHECK-NO-DTOR-DAG: [[NOINLINE_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[NOINLINE_VTABLE]], expr: !DIExpression()) +// CHECK-NO-DTOR-DAG: [[NOINLINE:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CNoInline" +// CHECK-NO-DTOR-DAG: !DIDerivedType(tag: DW_TAG_variable, name: "_vtable$", scope: [[NOINLINE]], file: {{.*}}, baseType: {{![0-9]+}}, flags: DIFlagPrivate | DIFlagArtificial | DIFlagStaticMember) + +// CHECK-NO-DTOR-O1-DAG: [[NOFNDEF_VTABLE:![0-9]+]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTV8CNoFnDef" +// CHECK-NO-DTOR-O1-DAG: [[NOFNDEF_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[NOFNDEF_VTABLE]], expr: !DIExpression()) diff --git a/clang/test/DebugInfo/CXX/vtable-inheritance-diamond.cpp b/clang/test/DebugInfo/CXX/vtable-inheritance-diamond.cpp index 5ed1353eebb10..5bf7dc15c46d0 100644 --- a/clang/test/DebugInfo/CXX/vtable-inheritance-diamond.cpp +++ b/clang/test/DebugInfo/CXX/vtable-inheritance-diamond.cpp @@ -1,5 +1,3 @@ -// REQUIRES: target={{x86_64.*-linux.*}} - // Diamond inheritance case: // For CBase, CLeft, CRight and CDerived we check: // - Generation of their vtables (including attributes). @@ -44,17 +42,18 @@ int main() { return 0; } -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -S -g %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes %s -o - | FileCheck %s // CHECK: $_ZTVN3NSP5CBaseE = comdat any // CHECK: $_ZTVN5NSP_15CLeftE = comdat any // CHECK: $_ZTVN5NSP_26CRightE = comdat any // CHECK: $_ZTV8CDerived = comdat any -// CHECK: @_ZTVN3NSP5CBaseE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] -// CHECK: @_ZTVN5NSP_15CLeftE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[LEFT_VTABLE_VAR:![0-9]*]] -// CHECK: @_ZTVN5NSP_26CRightE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[RIGHT_VTABLE_VAR:![0-9]*]] -// CHECK: @_ZTV8CDerived = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTVN3NSP5CBaseE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTVN5NSP_15CLeftE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[LEFT_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTVN5NSP_26CRightE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[RIGHT_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTV8CDerived = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] // CHECK: [[BASE_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[BASE_VTABLE:![0-9]*]], expr: !DIExpression()) // CHECK-NEXT: [[BASE_VTABLE]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTVN3NSP5CBaseE" diff --git a/clang/test/DebugInfo/CXX/vtable-inheritance-multiple.cpp b/clang/test/DebugInfo/CXX/vtable-inheritance-multiple.cpp index 23973a35d0e17..3b7e3a74f8eac 100644 --- a/clang/test/DebugInfo/CXX/vtable-inheritance-multiple.cpp +++ b/clang/test/DebugInfo/CXX/vtable-inheritance-multiple.cpp @@ -1,5 +1,3 @@ -// REQUIRES: target={{x86_64.*-linux.*}} - // Multiple inheritance case: // For CBaseOne, CBaseTwo and CDerived we check: // - Generation of their vtables (including attributes). @@ -38,15 +36,16 @@ int main() { return 0; } -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -S -g %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes %s -o - | FileCheck %s // CHECK: $_ZTVN5NSP_18CBaseOneE = comdat any // CHECK: $_ZTVN5NSP_28CBaseTwoE = comdat any // CHECK: $_ZTV8CDerived = comdat any -// CHECK: @_ZTVN5NSP_18CBaseOneE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_ONE_VTABLE_VAR:![0-9]*]] -// CHECK: @_ZTVN5NSP_28CBaseTwoE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_TWO_VTABLE_VAR:![0-9]*]] -// CHECK: @_ZTV8CDerived = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTVN5NSP_18CBaseOneE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_ONE_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTVN5NSP_28CBaseTwoE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_TWO_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTV8CDerived = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] // CHECK: [[BASE_ONE_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[BASE_ONE_VTABLE:![0-9]*]], expr: !DIExpression()) // CHECK-NEXT: [[BASE_ONE_VTABLE]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTVN5NSP_18CBaseOneE" diff --git a/clang/test/DebugInfo/CXX/vtable-inheritance-simple-main.cpp b/clang/test/DebugInfo/CXX/vtable-inheritance-simple-main.cpp index d64e711dddfa0..bcf8ff73cee69 100644 --- a/clang/test/DebugInfo/CXX/vtable-inheritance-simple-main.cpp +++ b/clang/test/DebugInfo/CXX/vtable-inheritance-simple-main.cpp @@ -1,5 +1,3 @@ -// REQUIRES: target={{x86_64.*-linux.*}} - // Simple inheritance case: // For CBase and CDerived we check: // - Generation of their vtables (including attributes). @@ -86,35 +84,35 @@ int main() { } #endif -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g %s -o %t.simple-base.bc -DBASE_CODE -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g %s -o %t.simple-derived.bc -DDERIVED_CODE -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g %s -o %t.simple-main.bc -DMAIN_CODE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o %t.simple-base.bc -DBASE_CODE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o %t.simple-derived.bc -DDERIVED_CODE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o %t.simple-main.bc -DMAIN_CODE // RUN: llvm-link %t.simple-base.bc %t.simple-derived.bc %t.simple-main.bc -S -o %t.simple-combined.ll // RUN: FileCheck --input-file=%t.simple-combined.ll -check-prefix=CHECK-ONE %s -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g -flto %s -o %t.simple-base.bc -DBASE_CODE -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g -flto %s -o %t.simple-derived.bc -DDERIVED_CODE -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g -flto %s -o %t.simple-main.bc -DMAIN_CODE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -flto -disable-llvm-passes %s -o %t.simple-base.bc -DBASE_CODE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -flto -disable-llvm-passes %s -o %t.simple-derived.bc -DDERIVED_CODE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -flto -disable-llvm-passes %s -o %t.simple-main.bc -DMAIN_CODE // RUN: llvm-link %t.simple-base.bc %t.simple-derived.bc %t.simple-main.bc -S -o %t.simple-combined.ll // RUN: FileCheck --input-file=%t.simple-combined.ll -check-prefix=CHECK-ONE %s -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g %s -o %t.simple-base.bc -DBASE_CODE -DSYMBOL_AT_FILE_SCOPE -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g %s -o %t.simple-derived.bc -DDERIVED_CODE -DSYMBOL_AT_FILE_SCOPE -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g %s -o %t.simple-main.bc -DMAIN_CODE -DSYMBOL_AT_FILE_SCOPE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o %t.simple-base.bc -DBASE_CODE -DSYMBOL_AT_FILE_SCOPE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o %t.simple-derived.bc -DDERIVED_CODE -DSYMBOL_AT_FILE_SCOPE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o %t.simple-main.bc -DMAIN_CODE -DSYMBOL_AT_FILE_SCOPE // RUN: llvm-link %t.simple-base.bc %t.simple-derived.bc %t.simple-main.bc -S -o %t.simple-combined.ll // RUN: FileCheck --input-file=%t.simple-combined.ll -check-prefix=CHECK-TWO %s -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g -flto %s -o %t.simple-base.bc -DBASE_CODE -DSYMBOL_AT_FILE_SCOPE -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g -flto %s -o %t.simple-derived.bc -DDERIVED_CODE -DSYMBOL_AT_FILE_SCOPE -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -c -g -flto %s -o %t.simple-main.bc -DMAIN_CODE -DSYMBOL_AT_FILE_SCOPE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -flto -disable-llvm-passes %s -o %t.simple-base.bc -DBASE_CODE -DSYMBOL_AT_FILE_SCOPE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -flto -disable-llvm-passes %s -o %t.simple-derived.bc -DDERIVED_CODE -DSYMBOL_AT_FILE_SCOPE +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm-bc -debug-info-kind=limited -dwarf-version=5 -O0 -flto -disable-llvm-passes %s -o %t.simple-main.bc -DMAIN_CODE -DSYMBOL_AT_FILE_SCOPE // RUN: llvm-link %t.simple-base.bc %t.simple-derived.bc %t.simple-main.bc -S -o %t.simple-combined.ll // RUN: FileCheck --input-file=%t.simple-combined.ll -check-prefix=CHECK-TWO %s // CHECK-ONE: ${{_ZN3NSP5CBaseC2Ev|_ZN8CDerivedC2Ev}} = comdat any // CHECK-ONE: ${{_ZN3NSP5CBaseC2Ev|_ZN8CDerivedC2Ev}} = comdat any -// CHECK-ONE: @_ZTV8CDerived = {{dso_local|hidden}} unnamed_addr constant {{.*}}, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] -// CHECK-ONE: @_ZTVN3NSP5CBaseE = {{dso_local|hidden}} unnamed_addr constant {{.*}}, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] +// CHECK-ONE: @_ZTV8CDerived = {{.*}}unnamed_addr constant {{.*}}, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] +// CHECK-ONE: @_ZTVN3NSP5CBaseE = {{.*}}unnamed_addr constant {{.*}}, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] // CHECK-ONE: [[DERIVED_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[DERIVED_VTABLE:![0-9]*]], expr: !DIExpression()) // CHECK-ONE-NEXT: [[DERIVED_VTABLE]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTV8CDerived" @@ -133,8 +131,8 @@ int main() { // CHECK-TWO: ${{_ZN3NSP5CBaseC2Ev|_ZN8CDerivedC2Ev}} = comdat any // CHECK-TWO: ${{_ZN3NSP5CBaseC2Ev|_ZN8CDerivedC2Ev}} = comdat any -// CHECK-TWO: @_ZTVN3NSP5CBaseE = {{dso_local|hidden}} unnamed_addr constant {{.*}}, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] -// CHECK-TWO: @_ZTV8CDerived = {{dso_local|hidden}} unnamed_addr constant {{.*}}, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] +// CHECK-TWO: @_ZTVN3NSP5CBaseE = {{.*}}unnamed_addr constant {{.*}}, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] +// CHECK-TWO: @_ZTV8CDerived = {{.*}}unnamed_addr constant {{.*}}, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] // CHECK-TWO: [[BASE_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[BASE_VTABLE:![0-9]*]], expr: !DIExpression()) // CHECK-TWO-NEXT: [[BASE_VTABLE]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTVN3NSP5CBaseE" diff --git a/clang/test/DebugInfo/CXX/vtable-inheritance-simple.cpp b/clang/test/DebugInfo/CXX/vtable-inheritance-simple.cpp index b24ece1598327..8d8c778dbb04e 100644 --- a/clang/test/DebugInfo/CXX/vtable-inheritance-simple.cpp +++ b/clang/test/DebugInfo/CXX/vtable-inheritance-simple.cpp @@ -28,18 +28,19 @@ int main() { return 0; } -// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -mrelocation-model pic -pic-is-pie -debug-info-kind=limited -dwarf-version=5 -disable-O0-optnone -disable-llvm-passes %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-windows-gnu -emit-llvm -mrelocation-model pic -pic-is-pie -debug-info-kind=limited -dwarf-version=5 -disable-O0-optnone -disable-llvm-passes %s -o - | FileCheck %s --check-prefix=COFF +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-mingw -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - | FileCheck %s --check-prefix=COFF // CHECK: $_ZTVN3NSP5CBaseE = comdat any // CHECK: $_ZTV8CDerived = comdat any -// CHECK: @_ZTVN3NSP5CBaseE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] -// CHECK: @_ZTV8CDerived = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] -// COFF: @_ZTVN3NSP5CBaseE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8 +// CHECK: @_ZTVN3NSP5CBaseE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTV8CDerived = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] +// COFF: @_ZTVN3NSP5CBaseE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8 // COFF-NOT: !dbg // COFF-SAME: {{$}} -// COFF: @_ZTV8CDerived = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8 +// COFF: @_ZTV8CDerived = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8 // COFF-NOT: !dbg // COFF-SAME: {{$}} diff --git a/clang/test/DebugInfo/CXX/vtable-inheritance-virtual.cpp b/clang/test/DebugInfo/CXX/vtable-inheritance-virtual.cpp index b01f156b7f654..c3015f0498419 100644 --- a/clang/test/DebugInfo/CXX/vtable-inheritance-virtual.cpp +++ b/clang/test/DebugInfo/CXX/vtable-inheritance-virtual.cpp @@ -1,5 +1,3 @@ -// REQUIRES: target={{x86_64.*-linux.*}} - // Virtual inheritance case: // For CBase, CLeft, CRight and CDerived we check: // - Generation of their vtables (including attributes). @@ -44,17 +42,18 @@ int main() { return 0; } -// RUN: %clang --target=x86_64-linux -Xclang -disable-O0-optnone -Xclang -disable-llvm-passes -emit-llvm -S -g %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes %s -o - | FileCheck %s // CHECK: $_ZTVN3NSP5CBaseE = comdat any // CHECK: $_ZTVN5NSP_15CLeftE = comdat any // CHECK: $_ZTVN5NSP_26CRightE = comdat any // CHECK: $_ZTV8CDerived = comdat any -// CHECK: @_ZTVN3NSP5CBaseE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] -// CHECK: @_ZTVN5NSP_15CLeftE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[LEFT_VTABLE_VAR:![0-9]*]] -// CHECK: @_ZTVN5NSP_26CRightE = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[RIGHT_VTABLE_VAR:![0-9]*]] -// CHECK: @_ZTV8CDerived = linkonce_odr {{dso_local|hidden}} unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTVN3NSP5CBaseE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[BASE_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTVN5NSP_15CLeftE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[LEFT_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTVN5NSP_26CRightE = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[RIGHT_VTABLE_VAR:![0-9]*]] +// CHECK: @_ZTV8CDerived = linkonce_odr {{.*}}unnamed_addr constant {{.*}}, comdat, align 8, !dbg [[DERIVED_VTABLE_VAR:![0-9]*]] // CHECK: [[BASE_VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[BASE_VTABLE:![0-9]*]], expr: !DIExpression()) // CHECK-NEXT: [[BASE_VTABLE]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTVN3NSP5CBaseE" diff --git a/clang/test/DebugInfo/CXX/vtable-template-instantiation.cpp b/clang/test/DebugInfo/CXX/vtable-template-instantiation.cpp new file mode 100644 index 0000000000000..60726d253a686 --- /dev/null +++ b/clang/test/DebugInfo/CXX/vtable-template-instantiation.cpp @@ -0,0 +1,84 @@ +// For the `CTemplate` templated class below, check the following cases: +// - Implicitly instantiated whole class by up-casting (`NOCAST` not defined) +// or implicitly instantiated member functions only (`NOCAST` defined): +// * The vtable is generated with a COMDAT specifier +// * Its '_vtable$' is generated +// - Define explicitly instantiation (`EXPLICIT` defined): +// * The vtable is generated with a COMDAT specifier +// * Its '_vtable$' is generated +// - Declare explicitly instantiation as `extern` (`EXTERN` defined): +// # when non-optimized: +// * The vtable is declared +// * Its '_vtable$' is NOT generated +// # when optimized even if no LLVM passes +// * The vtable is declared as `available_externally` (which is potentially turned into `external` by LLVM passes) +// * Its '_vtable$' is generated + +struct CBase { + virtual void f() noexcept {} +}; + +template +struct CTemplate: CBase { + void f() noexcept override; + virtual ~CTemplate() noexcept; +}; +template +void CTemplate::f() noexcept {} +template +CTemplate::~CTemplate() noexcept {} + +#ifdef EXPLICIT +template struct CTemplate; +#endif +#ifdef EXTERN +extern template struct CTemplate; +#endif + +CTemplate *get(CBase *) noexcept; + +int main() { + CTemplate Template; +#ifdef NOCAST + get(nullptr)->f(); +#else + get(&Template)->f(); +#endif + + return 0; +} + +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - | FileCheck %s -check-prefixes IMPLICIT +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes %s -o - | FileCheck %s -check-prefixes IMPLICIT +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - -DNOCAST | FileCheck %s -check-prefixes IMPLICIT +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes %s -o - -DNOCAST | FileCheck %s -check-prefixes IMPLICIT +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - -DEXPLICIT | FileCheck %s -check-prefixes EXPLICIT +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes %s -o - -DEXPLICIT | FileCheck %s -check-prefixes EXPLICIT +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O0 -disable-llvm-passes %s -o - -DEXTERN | FileCheck %s -check-prefixes EXTERN,EXTERN-O0 +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -debug-info-kind=limited -dwarf-version=5 -O1 -disable-llvm-passes %s -o - -DEXTERN | FileCheck %s -check-prefixes EXTERN,EXTERN-O1 + +// IMPLICIT: $_ZTV9CTemplateIvE = comdat any +// IMPLICIT: @_ZTV9CTemplateIvE = linkonce_odr {{.*}}unnamed_addr constant {{{ \[[^]]*\] } { \[[^]]*\] \[[^]]*\] }}}, comdat, align 8, !dbg [[VTABLE_VAR:![0-9]*]] +// IMPLICIT-DAG: [[VTABLE:![0-9]+]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTV9CTemplateIvE" +// IMPLICIT-DAG: !DIGlobalVariableExpression(var: [[VTABLE]], expr: !DIExpression()) +// IMPLICIT-DAG: [[TYPE:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CTemplate" +// IMPLICIT-DAG: !DIDerivedType(tag: DW_TAG_variable, name: "_vtable$", scope: [[TYPE]], file: {{.*}}, baseType: [[PVOID:![0-9]+]], flags: DIFlagPrivate | DIFlagArtificial | DIFlagStaticMember) +// IMPLICIT-DAG: [[PVOID]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) + +// EXPLICIT: $_ZTV9CTemplateIvE = comdat any +// EXPLICIT: @_ZTV9CTemplateIvE = weak_odr {{.*}}unnamed_addr constant {{{ \[[^]]*\] } { \[[^]]*\] \[[^]]*\] }}}, comdat, align 8, !dbg [[VTABLE_VAR:![0-9]*]] +// EXPLICIT-DAG: [[VTABLE:![0-9]+]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTV9CTemplateIvE" +// EXPLICIT-DAG: [[VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[VTABLE]], expr: !DIExpression()) +// EXPLICIT-DAG: [[TYPE:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CTemplate" +// EXPLICIT-DAG: !DIDerivedType(tag: DW_TAG_variable, name: "_vtable$", scope: [[TYPE]], file: {{.*}}, baseType: [[PVOID:![0-9]+]], flags: DIFlagPrivate | DIFlagArtificial | DIFlagStaticMember) +// EXPLICIT-DAG: [[PVOID]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) + +// EXTERN-NOT: $_ZTV9CTemplateIvE +// EXTERN-O0: @_ZTV9CTemplateIvE = external {{.*}}unnamed_addr constant {{{ \[[^]]*\] }}}, align 8{{$}} +// EXTERN-O1: @_ZTV9CTemplateIvE = available_externally {{.*}}unnamed_addr constant {{{ \[[^]]*\] } { \[[^]]*\] \[[^]]*\] }}}, align 8, !dbg [[VTABLE_VAR:![0-9]*]] +// EXTERN-O0-NOT: linkageName: "_ZTV9CTemplateIvE" +// EXTERN-O1-DAG: [[VTABLE:![0-9]+]] = distinct !DIGlobalVariable(name: "_vtable$", linkageName: "_ZTV9CTemplateIvE" +// EXTERN-O1-DAG: [[VTABLE_VAR]] = !DIGlobalVariableExpression(var: [[VTABLE]], expr: !DIExpression()) +// EXTERN-O1-DAG: [[TYPE:![0-9]+]] = !DICompositeType(tag: DW_TAG_structure_type, name: "CTemplate" +// EXTERN-O1-DAG: !DIDerivedType(tag: DW_TAG_variable, name: "_vtable$", scope: [[TYPE]], file: {{.*}}, baseType: [[PVOID:![0-9]+]], flags: DIFlagPrivate | DIFlagArtificial | DIFlagStaticMember) +// EXTERN-O1-DAG: [[PVOID]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) From 4abcbb053f8adaf48dbfff677e8ccda1f6d52b33 Mon Sep 17 00:00:00 2001 From: Tomohiro Kashiwada Date: Sun, 14 Sep 2025 06:40:12 +0900 Subject: [PATCH 236/734] [Clang][Cygwin] Use correct mangling rule (#158404) In https://github.com/llvm/llvm-project/commit/45ca613c135ea7b5fbc63bff003f20bf20f62081, whether to mangle names based on calling conventions according to Microsoft conventions was refactored to a bool in the TargetInfo. Cygwin targets also require this mangling, but were missed, presumably due to lack of test coverage of these targets. This commit enables the name mangling for Cygwin, and also enables test coverage of this mangling on Cygwin targets. --- clang/lib/Basic/Targets/X86.h | 2 ++ clang/test/CodeGen/mangle-windows.c | 6 ++++-- clang/test/CodeGenCXX/mangle-windows.cpp | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 6e013c95dbf01..d159a7906854c 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -646,6 +646,7 @@ class LLVM_LIBRARY_VISIBILITY CygwinX86_32TargetInfo : public X86_32TargetInfo { : X86_32TargetInfo(Triple, Opts) { this->WCharType = TargetInfo::UnsignedShort; this->WIntType = TargetInfo::UnsignedInt; + this->UseMicrosoftManglingForC = true; DoubleAlign = LongLongAlign = 64; resetDataLayout("e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-" "i128:128-f80:32-n8:16:32-a:0:32-S32", @@ -983,6 +984,7 @@ class LLVM_LIBRARY_VISIBILITY CygwinX86_64TargetInfo : public X86_64TargetInfo { : X86_64TargetInfo(Triple, Opts) { this->WCharType = TargetInfo::UnsignedShort; this->WIntType = TargetInfo::UnsignedInt; + this->UseMicrosoftManglingForC = true; } void getTargetDefines(const LangOptions &Opts, diff --git a/clang/test/CodeGen/mangle-windows.c b/clang/test/CodeGen/mangle-windows.c index 046b1e8815a8a..e1b06e72a9635 100644 --- a/clang/test/CodeGen/mangle-windows.c +++ b/clang/test/CodeGen/mangle-windows.c @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-mingw32 | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-mingw32 | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-cygwin | FileCheck %s // RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-windows-msvc-elf | FileCheck %s --check-prefix=ELF32 // RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64 -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-mingw32 | FileCheck %s --check-prefix=X64 +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-mingw32 | FileCheck %s --check-prefix=X64 +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-cygwin | FileCheck %s --check-prefix=X64 // RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-windows-msvc-elf | FileCheck %s --check-prefix=ELF64 // CHECK: target datalayout = "e-m:x-{{.*}}" diff --git a/clang/test/CodeGenCXX/mangle-windows.cpp b/clang/test/CodeGenCXX/mangle-windows.cpp index 3d5a1e9a868ef..737abcf6e3498 100644 --- a/clang/test/CodeGenCXX/mangle-windows.cpp +++ b/clang/test/CodeGenCXX/mangle-windows.cpp @@ -4,6 +4,9 @@ // RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-mingw32 | \ // RUN: FileCheck --check-prefix=ITANIUM %s +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-cygwin | \ +// RUN: FileCheck --check-prefix=ITANIUM %s + void __stdcall f1(void) {} // WIN: define dso_local x86_stdcallcc void @"?f1@@YGXXZ" // ITANIUM: define dso_local x86_stdcallcc void @"\01__Z2f1v@0" From 9490d58fa92bb338db96af331194c9ba26eb0201 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sat, 13 Sep 2025 22:32:48 +0000 Subject: [PATCH 237/734] Revert "[VPlan] Compute cost of scalar (U|S)Div, (U|S)Rem in computeCost (NFCI)." This reverts commit de7e3a589525179f3b02b84b194aac6cf581425c. This broke quite a few upstream buildbots and premerge. Reverting for now to get things back to green. https://lab.llvm.org/buildbot/#/builders/137/builds/25467 --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f5c8cf106d8b5..c6273074778d1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3151,23 +3151,9 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: - case Instruction::Select: return *getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), Ctx) * (isSingleScalar() ? 1 : VF.getFixedValue()); - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::SRem: - case Instruction::URem: { - InstructionCost ScalarCost = *getCostForRecipeWithOpcode( - getOpcode(), ElementCount::getFixed(1), Ctx); - if (isSingleScalar()) - return ScalarCost; - - return ScalarCost * VF.getFixedValue() + - Ctx.getScalarizationOverhead(Ctx.Types.inferScalarType(this), - to_vector(operands()), VF); - } case Instruction::Load: case Instruction::Store: { if (isSingleScalar()) { From a5641e40d7e6424ae18c04038eed6b94c59eb7c4 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 13 Sep 2025 18:03:26 -0700 Subject: [PATCH 238/734] [ADT] Add DenseMap::deallocateBuckets (NFC) (#158443) This patch adds a small helper function DenseMap::deallocateBuckets just like SmallDenseMap::deallocateBuckets. With the new helper function: ~DenseMap() DenseMap &operator=(DenseMap &&other) will look identical to their respective SmallDenseMap counterparts, facilitating further refactoring. --- llvm/include/llvm/ADT/DenseMap.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index 18dd7f30c5616..f076049c55a26 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -738,7 +738,7 @@ class DenseMap : public DenseMapBase, ~DenseMap() { this->destroyAll(); - deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); + deallocateBuckets(); } void swap(DenseMap &RHS) { @@ -758,7 +758,7 @@ class DenseMap : public DenseMapBase, DenseMap &operator=(DenseMap &&other) { this->destroyAll(); - deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); + deallocateBuckets(); init(0); swap(other); return *this; @@ -766,7 +766,7 @@ class DenseMap : public DenseMapBase, void copyFrom(const DenseMap &other) { this->destroyAll(); - deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); + deallocateBuckets(); if (allocateBuckets(other.NumBuckets)) { this->BaseT::copyFrom(other); } else { @@ -827,6 +827,10 @@ class DenseMap : public DenseMapBase, unsigned getNumBuckets() const { return NumBuckets; } + void deallocateBuckets() { + deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); + } + bool allocateBuckets(unsigned Num) { NumBuckets = Num; if (NumBuckets == 0) { From 0bbf2ea08a2adb0880c7958f9c609af7c479da46 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Sat, 13 Sep 2025 19:14:21 -0700 Subject: [PATCH 239/734] [profcheck] Exclude LoopVectorize test introduced in ef7e03a (#158452) --- llvm/utils/profcheck-xfail.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index 9d170b392b6c7..482848842aa05 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -1302,6 +1302,7 @@ Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll Transforms/LoopVectorize/first-order-recurrence-complex.ll Transforms/LoopVectorize/first-order-recurrence.ll Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll Transforms/LoopVectorize/float-induction.ll Transforms/LoopVectorize/float-minmax-instruction-flag.ll Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll From ad9d551e5568e155005e569a0c9527b6e45c92b8 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sun, 14 Sep 2025 05:56:51 +0300 Subject: [PATCH 240/734] [Mips] Remove `size` operand of LwRxPcTcp16 / LwRxPcTcpX16 (#157348) There is no such operand on LW instructions. It is neither encoded nor printed, and was only inserted by MipsConstantIslands pass but never used after that. --- llvm/lib/Target/Mips/Mips16InstrInfo.td | 5 +++-- llvm/lib/Target/Mips/MipsConstantIslandPass.cpp | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.td b/llvm/lib/Target/Mips/Mips16InstrInfo.td index ab473c133b8e3..296414c6a06db 100644 --- a/llvm/lib/Target/Mips/Mips16InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.td @@ -88,7 +88,7 @@ class FRI16_ins op, string asmstr, class FRI16_TCP_ins _op, string asmstr, InstrItinClass itin>: - FRI16<_op, (outs CPU16Regs:$rx), (ins pcrel16:$imm8, i32imm:$size), + FRI16<_op, (outs CPU16Regs:$rx), (ins pcrel16:$imm8), !strconcat(asmstr, "\t$rx, $imm8\t# 16 bit inst"), [], itin>; class FRI16R_ins_base op, string asmstr, string asmstr2, @@ -216,7 +216,7 @@ class FEXT_RI16_B_ins _op, string asmstr, class FEXT_RI16_TCP_ins _op, string asmstr, InstrItinClass itin>: - FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins pcrel16:$imm16, i32imm:$size), + FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins pcrel16:$imm16), !strconcat(asmstr, "\t$rx, $imm16"), [], itin>; class FEXT_2RI16_ins _op, string asmstr, @@ -856,6 +856,7 @@ def LwRxSpImmX16: FEXT_RRI16_mem_ins<0b10010, "lw", mem16sp, II_LW>, MayLoad; def LwRxPcTcp16: FRI16_TCP_ins<0b10110, "lw", II_LW>, MayLoad; def LwRxPcTcpX16: FEXT_RI16_TCP_ins<0b10110, "lw", II_LW>, MayLoad; + // // Format: MOVE r32, rz MIPS16e // Purpose: Move diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index 8699807b6bf2b..31a229a0fd102 100644 --- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -1647,7 +1647,6 @@ void MipsConstantIslands::prescanForConstants() { MI.removeOperand(1); MI.removeOperand(1); MI.addOperand(MachineOperand::CreateCPI(index, 0)); - MI.addOperand(MachineOperand::CreateImm(4)); } break; } From 87cca0c32c4e3ce51b53cf5c3ee0a430c1a6cb8f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 14 Sep 2025 00:04:11 -0700 Subject: [PATCH 241/734] [lit] Split Linux specific ulimit tests (#158390) Some of the ulimit limits do not work on some POSIX platforms. THe motivating example here is ulimit -v on MacOS as the relevant system calls are not implemented in XNU. Splitting the tests lets us keep test coverage on POSIX (non-Linux) platforms. --- .../tests/Inputs/shtest-ulimit-nondarwin/lit.cfg | 8 ++++++++ .../Inputs/shtest-ulimit-nondarwin/ulimit_okay.txt | 4 ++++ .../lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt | 1 - llvm/utils/lit/tests/shtest-ulimit-nondarwin.py | 13 +++++++++++++ llvm/utils/lit/tests/shtest-ulimit.py | 8 -------- 5 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/lit.cfg create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_okay.txt create mode 100644 llvm/utils/lit/tests/shtest-ulimit-nondarwin.py diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/lit.cfg new file mode 100644 index 0000000000000..c7bdc7e7b6bc0 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/lit.cfg @@ -0,0 +1,8 @@ +import lit.formats + +config.name = "shtest-ulimit" +config.suffixes = [".txt"] +config.test_format = lit.formats.ShTest(execute_external=False) +config.test_source_root = None +config.test_exec_root = None +config.substitutions.append(("%{python}", '"%s"' % (sys.executable))) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_okay.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_okay.txt new file mode 100644 index 0000000000000..dbdd0037e70a7 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_okay.txt @@ -0,0 +1,4 @@ +# RUN: ulimit -v 1048576 +# RUN: %{python} %S/../shtest-ulimit/print_limits.py +# Fail the test so that we can assert on the output. +# RUN: not echo return diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt index ad353b5d7c459..4edf1c303a092 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt @@ -1,4 +1,3 @@ -# RUN: ulimit -v 1048576 # RUN: ulimit -n 50 # RUN: %{python} %S/print_limits.py # Fail the test so that we can assert on the output. diff --git a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py new file mode 100644 index 0000000000000..2661a2c8d6448 --- /dev/null +++ b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py @@ -0,0 +1,13 @@ +# Check the ulimit command + +# ulimit does not work on non-POSIX platforms. +# These tests are specific to options that Darwin does not support. +# UNSUPPORTED: system-windows, system-darwin + +# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit-nondarwin | FileCheck %s + +# CHECK: -- Testing: 1 tests{{.*}} + +# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}}) +# CHECK: ulimit -v 1048576 +# CHECK: RLIMIT_AS=1073741824 diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py index b86578a21f661..15336d51ca3df 100644 --- a/llvm/utils/lit/tests/shtest-ulimit.py +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -3,12 +3,6 @@ # ulimit does not work on non-POSIX platforms. # UNSUPPORTED: system-windows -# TODO(boomanaiden154): The test fails on some non-Linux POSIX -# platforms (like MacOS) due to the underlying system not supporting -# ulimit -v. This test needs to be carved up so we keep full test -# coverage on Linux and as much as possible on other platforms. -# REQUIRES: system-linux - # RUN: not %{lit} -a -v %{inputs}/shtest-ulimit | FileCheck %s # CHECK: -- Testing: 2 tests{{.*}} @@ -18,7 +12,5 @@ # CHECK: 'ulimit' requires two arguments # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}}) -# CHECK: ulimit -v 1048576 # CHECK: ulimit -n 50 -# CHECK: RLIMIT_AS=1073741824 # CHECK: RLIMIT_NOFILE=50 From b6014b602bc5bba643c292bca0524876f74178dd Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 14 Sep 2025 07:45:17 +0000 Subject: [PATCH 242/734] [lit] Mark shtest-ulimit.py unsupported on Solaris There were some build failures because apparently running ulimit -n 50 actually means ulimit -n 48 there. Losing the test coverage on Solaris shouldn't be a big deal. --- llvm/utils/lit/tests/shtest-ulimit.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py index 15336d51ca3df..e84327772d3a1 100644 --- a/llvm/utils/lit/tests/shtest-ulimit.py +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -1,7 +1,9 @@ # Check the ulimit command # ulimit does not work on non-POSIX platforms. -# UNSUPPORTED: system-windows +# Solaris for some reason does not respect ulimit -n, so mark it unsupported +# as well. +# UNSUPPORTED: system-windows, system-solaris # RUN: not %{lit} -a -v %{inputs}/shtest-ulimit | FileCheck %s From 9c91d4a31d758cbddc0fc6347470604c2040ab2f Mon Sep 17 00:00:00 2001 From: Baranov Victor Date: Sun, 14 Sep 2025 11:45:55 +0300 Subject: [PATCH 243/734] [clang-tools-extra][docs] Add "Potentially Breaking Changes" section to ReleaseNotes (#158434) This implements: https://github.com/llvm/llvm-project/blob/30e9cbacab5b474de89992851f126fff300c1ab7/llvm/docs/DeveloperPolicy.rst?plain=1#L282-L286 From LLVM developer policy. --- clang-tools-extra/docs/ReleaseNotes.rst | 15 +++++++++++++++ clang-tools-extra/docs/ReleaseNotesTemplate.txt | 3 +++ 2 files changed, 18 insertions(+) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 34091906cbff2..7cdff86beeec6 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -46,6 +46,21 @@ infrastructure are described first, followed by tool-specific sections. Major New Features ------------------ +Potentially Breaking Changes +---------------------------- + +- Removed :program:`clang-tidy`'s global options `IgnoreMacros` and + `StrictMode`, which were documented as deprecated since + :program:`clang-tidy-20`. Users should use the check-specific options of the + same name instead. + +- Renamed :program:`clang-tidy`'s option name of check + :doc:`bugprone-easily-swappable-parameters + ` from + ``NamePrefixSuffixSilenceDissimilarityTreshold`` to + ``NamePrefixSuffixSilenceDissimilarityThreshold``, + correcting a spelling mistake. + Improvements to clangd ---------------------- diff --git a/clang-tools-extra/docs/ReleaseNotesTemplate.txt b/clang-tools-extra/docs/ReleaseNotesTemplate.txt index b17799b3b557d..69c3bcf67b8db 100644 --- a/clang-tools-extra/docs/ReleaseNotesTemplate.txt +++ b/clang-tools-extra/docs/ReleaseNotesTemplate.txt @@ -46,6 +46,9 @@ infrastructure are described first, followed by tool-specific sections. Major New Features ------------------ +Potentially Breaking Changes +---------------------------- + Improvements to clangd ---------------------- From 43ba999d1205f787e76f4d4a01f45380d373efe9 Mon Sep 17 00:00:00 2001 From: Dor Arad <45083160+undor@users.noreply.github.com> Date: Sun, 14 Sep 2025 12:07:38 +0300 Subject: [PATCH 244/734] [mlir][scf] ExecuteRegionOp folders to consider no_inline attribute (#158083) Fix missing handling of no_inline existence in ExecuteRegionOp folders. Co-authored-by: Dor Arad --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 2 ++ mlir/test/Dialect/SCF/canonicalize.mlir | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 45b14fcf8aadd..c35989ecba6cd 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -237,6 +237,8 @@ struct MultiBlockExecuteInliner : public OpRewritePattern { LogicalResult matchAndRewrite(ExecuteRegionOp op, PatternRewriter &rewriter) const override { + if (op.getNoInline()) + return failure(); if (!isa(op->getParentOp())) return failure(); diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index 2752c492cb2be..4ad2da8388eb7 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -1483,6 +1483,24 @@ func.func @execute_region_no_inline() { // ----- +// CHECK-LABEL: func @execute_region_under_func_no_inline +func.func @execute_region_under_func_no_inline() { + "test.foo"() : () -> () + %v = scf.execute_region -> i64 no_inline { + %x = "test.val"() : () -> i64 + scf.yield %x : i64 + } + "test.bar"(%v) : (i64) -> () + return +} + +// CHECK-NEXT: "test.foo"() : () -> () +// CHECK-NEXT: scf.execute_region +// CHECK-NEXT: %[[VAL:.*]] = "test.val"() : () -> i64 +// CHECK-NEXT: scf.yield %[[VAL]] : i64 + +// ----- + // CHECK-LABEL: func @func_execute_region_inline func.func @func_execute_region_inline() { "test.foo"() : () -> () From 1b1b83ffacc56e0c6344be25a7238f509fd705e6 Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Sun, 14 Sep 2025 11:29:00 +0200 Subject: [PATCH 245/734] [CIR][NFC] Remove Covered MissingFeatures flags for Complex (#158425) Remove Covered MissingFeatures flags for Complex --- clang/include/clang/CIR/MissingFeatures.h | 2 -- clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 3 --- clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp | 2 -- 3 files changed, 7 deletions(-) diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 52d5f8a2ded2c..60e0aa163dc04 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -296,8 +296,6 @@ struct MissingFeatures { // Future CIR operations static bool awaitOp() { return false; } static bool callOp() { return false; } - static bool complexImagOp() { return false; } - static bool complexRealOp() { return false; } static bool ifOp() { return false; } static bool invokeOp() { return false; } static bool labelOp() { return false; } diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 24aef693024f7..8918eb4cbb1ad 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1754,9 +1754,6 @@ LogicalResult cir::BinOp::verify() { return emitError() << "The nsw/nuw flags and the saturated flag are " "mutually exclusive"; - assert(!cir::MissingFeatures::complexType()); - // TODO(cir): verify for complex binops - return mlir::success(); } diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp index d41ea0af58938..fbecab9774f5b 100644 --- a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp @@ -134,8 +134,6 @@ void CIRCanonicalizePass::runOnOperation() { getOperation()->walk([&](Operation *op) { assert(!cir::MissingFeatures::switchOp()); assert(!cir::MissingFeatures::tryOp()); - assert(!cir::MissingFeatures::complexRealOp()); - assert(!cir::MissingFeatures::complexImagOp()); assert(!cir::MissingFeatures::callOp()); // Many operations are here to perform a manual `fold` in From 91d4c0dfdf226665c17d4a44e2fab466e6103d18 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 14 Sep 2025 13:15:03 +0100 Subject: [PATCH 246/734] Reapply "[VPlan] Compute cost of scalar (U|S)Div, (U|S)Rem in computeCost (NFCI)." This reverts commit 9490d58fa92bb338db96af331194c9ba26eb0201. Recommits de7e3a58952 with a fix for an unhandled case, causing crashes in some configs. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index c6273074778d1..b72088bf1431e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3154,6 +3154,19 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, return *getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), Ctx) * (isSingleScalar() ? 1 : VF.getFixedValue()); + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: { + InstructionCost ScalarCost = *getCostForRecipeWithOpcode( + getOpcode(), ElementCount::getFixed(1), Ctx); + if (isSingleScalar()) + return ScalarCost; + + return ScalarCost * VF.getFixedValue() + + Ctx.getScalarizationOverhead(Ctx.Types.inferScalarType(this), + to_vector(operands()), VF); + } case Instruction::Load: case Instruction::Store: { if (isSingleScalar()) { From 94213a4aefc8dda671493aed993f0c6665c5f146 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Sun, 14 Sep 2025 14:19:19 +0200 Subject: [PATCH 247/734] [LifetimeSafety] Add support for GSL Pointer types (#154009) This extends the lifetime safety analysis to support C++ types annotated with `gsl::Pointer`, which represent non-owning "view" types like `std::string_view`. These types have the same lifetime safety concerns as raw pointers and references. - Added support for detecting and analyzing `gsl::Pointer` annotated types in lifetime safety analysis - Implemented handling for various expressions involving `gsl::Pointer` types: - Constructor expressions - Member call expressions (especially conversion operators) - Functional cast expressions - Initialization list expressions - Materialized temporary expressions - Updated the pointer type detection to recognize `gsl::Pointer` types - Added handling for function calls that create borrows through reference parameters Fixes: https://github.com/llvm/llvm-project/issues/152513 --- clang/lib/Analysis/LifetimeSafety.cpp | 94 ++++++++- clang/test/Sema/warn-lifetime-safety.cpp | 120 +++++++++++- .../unittests/Analysis/LifetimeSafetyTest.cpp | 182 +++++++++++++++++- 3 files changed, 381 insertions(+), 15 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index e687e5419c50a..0dd5716d93fb6 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -478,6 +478,25 @@ class FactGenerator : public ConstStmtVisitor { } } + void VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (isGslPointerType(CCE->getType())) { + handleGSLPointerConstruction(CCE); + return; + } + } + + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Specifically for conversion operators, + // like `std::string_view p = std::string{};` + if (isGslPointerType(MCE->getType()) && + isa(MCE->getCalleeDecl())) { + // The argument is the implicit object itself. + handleFunctionCall(MCE, MCE->getMethodDecl(), + {MCE->getImplicitObjectArgument()}); + } + // FIXME: A more general VisitCallExpr could also be used here. + } + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized /// pointers can use the same type of loan. @@ -530,8 +549,27 @@ class FactGenerator : public ConstStmtVisitor { void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { // Check if this is a test point marker. If so, we are done with this // expression. - if (VisitTestPoint(FCE)) + if (handleTestPoint(FCE)) return; + if (isGslPointerType(FCE->getType())) + addAssignOriginFact(*FCE, *FCE->getSubExpr()); + } + + void VisitInitListExpr(const InitListExpr *ILE) { + if (!hasOrigin(ILE)) + return; + // For list initialization with a single element, like `View{...}`, the + // origin of the list itself is the origin of its single element. + if (ILE->getNumInits() == 1) + addAssignOriginFact(*ILE, *ILE->getInit(0)); + } + + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { + if (!hasOrigin(MTE)) + return; + // A temporary object's origin is the same as the origin of the + // expression that initializes it. + addAssignOriginFact(*MTE, *MTE->getSubExpr()); } void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { @@ -557,10 +595,21 @@ class FactGenerator : public ConstStmtVisitor { } private: - static bool isPointerType(QualType QT) { - return QT->isPointerOrReferenceType(); + static bool isGslPointerType(QualType QT) { + if (const auto *RD = QT->getAsCXXRecordDecl()) { + // We need to check the template definition for specializations. + if (auto *CTSD = dyn_cast(RD)) + return CTSD->getSpecializedTemplate() + ->getTemplatedDecl() + ->hasAttr(); + return RD->hasAttr(); + } + return false; } + static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType() || isGslPointerType(QT); + } // Check if a type has an origin. static bool hasOrigin(const Expr *E) { return E->isGLValue() || isPointerType(E->getType()); @@ -570,6 +619,41 @@ class FactGenerator : public ConstStmtVisitor { return isPointerType(VD->getType()); } + void handleGSLPointerConstruction(const CXXConstructExpr *CCE) { + assert(isGslPointerType(CCE->getType())); + if (CCE->getNumArgs() != 1) + return; + if (hasOrigin(CCE->getArg(0))) + addAssignOriginFact(*CCE, *CCE->getArg(0)); + else + // This could be a new borrow. + handleFunctionCall(CCE, CCE->getConstructor(), + {CCE->getArgs(), CCE->getNumArgs()}); + } + + /// Checks if a call-like expression creates a borrow by passing a value to a + /// reference parameter, creating an IssueFact if it does. + void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, + ArrayRef Args) { + if (!FD) + return; + // TODO: Handle more than one arguments. + for (unsigned I = 0; I <= 0 /*Args.size()*/; ++I) { + const Expr *ArgExpr = Args[I]; + + // Propagate origins for CXX this. + if (FD->isCXXClassMember() && I == 0) { + addAssignOriginFact(*Call, *ArgExpr); + continue; + } + // The parameter is a pointer, reference, or gsl::Pointer. + // This is a borrow. We propagate the origin from the argument expression + // at the call site to the parameter declaration in the callee. + if (hasOrigin(ArgExpr)) + addAssignOriginFact(*Call, *ArgExpr); + } + } + /// Creates a loan for the storage path of a given declaration reference. /// This function should be called whenever a DeclRefExpr represents a borrow. /// \param DRE The declaration reference expression that initiates the borrow. @@ -593,7 +677,7 @@ class FactGenerator : public ConstStmtVisitor { /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. /// If so, creates a `TestPointFact` and returns true. - bool VisitTestPoint(const CXXFunctionalCastExpr *FCE) { + bool handleTestPoint(const CXXFunctionalCastExpr *FCE) { if (!FCE->getType()->isVoidType()) return false; @@ -641,6 +725,8 @@ class FactGenerator : public ConstStmtVisitor { } void markUseAsWrite(const DeclRefExpr *DRE) { + if (!isPointerType(DRE->getType())) + return; assert(UseFacts.contains(DRE)); UseFacts[DRE]->markAsWritten(); } diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index 660b9c9d5e243..bc8a5f3f7150f 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -6,6 +6,12 @@ struct MyObj { MyObj operator+(MyObj); }; +struct [[gsl::Pointer()]] View { + View(const MyObj&); // Borrows from MyObj + View(); + void use() const; +}; + //===----------------------------------------------------------------------===// // Basic Definite Use-After-Free (-W...permissive) // These are cases where the pointer is guaranteed to be dangling at the use site. @@ -20,12 +26,31 @@ void definite_simple_case() { (void)*p; // expected-note {{later used here}} } +void definite_simple_case_gsl() { + View v; + { + MyObj s; + v = s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + void no_use_no_error() { MyObj* p; { MyObj s; p = &s; } + // 'p' is dangling here, but since it is never used, no warning is issued. +} + +void no_use_no_error_gsl() { + View v; + { + MyObj s; + v = s; + } + // 'v' is dangling here, but since it is never used, no warning is issued. } void definite_pointer_chain() { @@ -39,6 +64,16 @@ void definite_pointer_chain() { (void)*q; // expected-note {{later used here}} } +void definite_propagation_gsl() { + View v1, v2; + { + MyObj s; + v1 = s; // expected-warning {{object whose reference is captured does not live long enough}} + v2 = v1; + } // expected-note {{destroyed here}} + v2.use(); // expected-note {{later used here}} +} + void definite_multiple_uses_one_warning() { MyObj* p; { @@ -78,6 +113,19 @@ void definite_single_pointer_multiple_loans(bool cond) { (void)*p; // expected-note 2 {{later used here}} } +void definite_single_pointer_multiple_loans_gsl(bool cond) { + View v; + if (cond){ + MyObj s; + v = s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + else { + MyObj t; + v = t; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note 2 {{later used here}} +} + //===----------------------------------------------------------------------===// // Potential (Maybe) Use-After-Free (-W...strict) @@ -94,18 +142,14 @@ void potential_if_branch(bool cond) { (void)*p; // expected-note {{later used here}} } -// If all paths lead to a dangle, it becomes a definite error. -void potential_becomes_definite(bool cond) { - MyObj* p; +void potential_if_branch_gsl(bool cond) { + MyObj safe; + View v = safe; if (cond) { - MyObj temp1; - p = &temp1; // expected-warning {{does not live long enough}} - } // expected-note {{destroyed here}} - else { - MyObj temp2; - p = &temp2; // expected-warning {{does not live long enough}} + MyObj temp; + v = temp; // expected-warning {{object whose reference is captured may not live long enough}} } // expected-note {{destroyed here}} - (void)*p; // expected-note 2 {{later used here}} + v.use(); // expected-note {{later used here}} } void definite_potential_together(bool cond) { @@ -159,6 +203,16 @@ void potential_for_loop_use_after_loop_body(MyObj safe) { (void)*p; // expected-note {{later used here}} } +void potential_for_loop_gsl() { + MyObj safe; + View v = safe; + for (int i = 0; i < 1; ++i) { + MyObj s; + v = s; // expected-warning {{object whose reference is captured may not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + void potential_for_loop_use_before_loop_body(MyObj safe) { MyObj* p = &safe; for (int i = 0; i < 1; ++i) { @@ -182,6 +236,19 @@ void potential_loop_with_break(bool cond) { (void)*p; // expected-note {{later used here}} } +void potential_loop_with_break_gsl(bool cond) { + MyObj safe; + View v = safe; + for (int i = 0; i < 10; ++i) { + if (cond) { + MyObj temp; + v = temp; // expected-warning {{object whose reference is captured may not live long enough}} + break; // expected-note {{destroyed here}} + } + } + v.use(); // expected-note {{later used here}} +} + void potential_multiple_expiry_of_same_loan(bool cond) { // Choose the last expiry location for the loan. MyObj safe; @@ -258,6 +325,28 @@ void definite_switch(int mode) { (void)*p; // expected-note 3 {{later used here}} } +void definite_switch_gsl(int mode) { + View v; + switch (mode) { + case 1: { + MyObj temp1; + v = temp1; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + case 2: { + MyObj temp2; + v = temp2; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + default: { + MyObj temp3; + v = temp3; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + } + v.use(); // expected-note 3 {{later used here}} +} + //===----------------------------------------------------------------------===// // No-Error Cases //===----------------------------------------------------------------------===// @@ -271,3 +360,14 @@ void no_error_if_dangle_then_rescue() { p = &safe; // p is "rescued" before use. (void)*p; // This is safe. } + +void no_error_if_dangle_then_rescue_gsl() { + MyObj safe; + View v; + { + MyObj temp; + v = temp; // 'v' is temporarily dangling. + } + v = safe; // 'v' is "rescued" before use by reassigning to a valid object. + v.use(); // This is safe. +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 13e5832d70050..bff5378c0a8a9 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -11,7 +11,6 @@ #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Testing/TestAST.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -31,7 +30,13 @@ class LifetimeTestRunner { LifetimeTestRunner(llvm::StringRef Code) { std::string FullCode = R"( #define POINT(name) void("__lifetime_test_point_" #name) + struct MyObj { ~MyObj() {} int i; }; + + struct [[gsl::Pointer()]] View { + View(const MyObj&); + View(); + }; )"; FullCode += Code.str(); @@ -741,5 +746,180 @@ TEST_F(LifetimeAnalysisTest, NoDuplicateLoansForImplicitCastToConst) { EXPECT_THAT(Helper->getLoansForVar("a"), SizeIs(2)); } +TEST_F(LifetimeAnalysisTest, GslPointerSimpleLoan) { + SetupTest(R"( + void target() { + MyObj a; + View x = a; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerConstructFromOwner) { + SetupTest(R"( + void target() { + MyObj al, bl, cl, dl, el, fl; + View a = View(al); + View b = View{bl}; + View c = View(View(View(cl))); + View d = View{View(View(dl))}; + View e = View{View{View{el}}}; + View f = {fl}; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("a"), HasLoansTo({"al"}, "p1")); + EXPECT_THAT(Origin("b"), HasLoansTo({"bl"}, "p1")); + EXPECT_THAT(Origin("c"), HasLoansTo({"cl"}, "p1")); + EXPECT_THAT(Origin("d"), HasLoansTo({"dl"}, "p1")); + EXPECT_THAT(Origin("e"), HasLoansTo({"el"}, "p1")); + EXPECT_THAT(Origin("f"), HasLoansTo({"fl"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerConstructFromView) { + SetupTest(R"( + void target() { + MyObj a; + View x = View(a); + View y = View{x}; + View z = View(View(View(y))); + View p = View{View(View(x))}; + View q = {x}; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("q"), HasLoansTo({"a"}, "p1")); +} + +// FIXME: Handle loans in ternary operator! +TEST_F(LifetimeAnalysisTest, GslPointerInConditionalOperator) { + SetupTest(R"( + void target(bool cond) { + MyObj a, b; + View v = cond ? a : b; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({}, "p1")); +} + +// FIXME: Handle temporaries. +TEST_F(LifetimeAnalysisTest, ViewFromTemporary) { + SetupTest(R"( + MyObj temporary(); + void target() { + View v = temporary(); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerWithConstAndAuto) { + SetupTest(R"( + void target() { + MyObj a; + const View v1 = a; + auto v2 = v1; + const auto& v3 = v2; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v3"), HasLoansTo({"a"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerPropagation) { + SetupTest(R"( + void target() { + MyObj a; + View x = a; + POINT(p1); + + View y = x; // Propagation via copy-construction + POINT(p2); + + View z; + z = x; // Propagation via copy-assignment + POINT(p3); + } + )"); + + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"a"}, "p2")); + EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p3")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerLoanExpiration) { + SetupTest(R"( + void target() { + View x; + { + MyObj a; + x = a; + POINT(before_expiry); + } // `a` is destroyed here. + POINT(after_expiry); + } + )"); + + EXPECT_THAT(NoLoans(), AreExpiredAt("before_expiry")); + EXPECT_THAT(LoansTo({"a"}), AreExpiredAt("after_expiry")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerReassignment) { + SetupTest(R"( + void target() { + MyObj safe; + View v; + v = safe; + POINT(p1); + { + MyObj unsafe; + v = unsafe; + POINT(p2); + } // `unsafe` expires here. + POINT(p3); + } + )"); + + EXPECT_THAT(Origin("v"), HasLoansTo({"safe"}, "p1")); + EXPECT_THAT(Origin("v"), HasLoansTo({"unsafe"}, "p2")); + EXPECT_THAT(Origin("v"), HasLoansTo({"unsafe"}, "p3")); + EXPECT_THAT(LoansTo({"unsafe"}), AreExpiredAt("p3")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerConversionOperator) { + SetupTest(R"( + struct String; + + struct [[gsl::Pointer()]] StringView { + StringView() = default; + }; + + struct String { + ~String() {} + operator StringView() const; + }; + + void target() { + String xl, yl; + StringView x = xl; + StringView y; + y = yl; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"xl"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"yl"}, "p1")); +} + } // anonymous namespace } // namespace clang::lifetimes::internal From e07b5968d4ac1515582ad578aaa5497782972666 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Sun, 14 Sep 2025 08:29:37 -0400 Subject: [PATCH 248/734] [InstCombine] Fold select pattern with sub and negation to abs intrinsic (#156246) ```llvm %sub = sub nsw T %x, %y %cmp = icmp sgt T %x, %y ; or sge %neg = sub T 0, %sub %abs = select i1 %cmp, T %sub, T %neg ``` becomes: ```llvm %sub = sub nsw T %x, %y %abs = call T @llvm.abs.T(T %sub, i1 false) ``` Alive2: https://alive2.llvm.org/ce/z/ApdJX8 https://alive2.llvm.org/ce/z/gRTmZk --- .../InstCombine/InstCombineSelect.cpp | 32 ++++ .../Transforms/InstCombine/abs-intrinsic.ll | 137 ++++++++++++++++++ 2 files changed, 169 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 9467463d39c0e..8f9d0bf6240d5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1153,6 +1153,38 @@ static Value *foldAbsDiff(ICmpInst *Cmp, Value *TVal, Value *FVal, return Builder.CreateBinaryIntrinsic(Intrinsic::abs, TI, Builder.getTrue()); } + // Match: (A > B) ? (A - B) : (0 - (A - B)) --> abs(A - B) + if (Pred == CmpInst::ICMP_SGT && + match(TI, m_NSWSub(m_Specific(A), m_Specific(B))) && + match(FI, m_Neg(m_Specific(TI)))) { + return Builder.CreateBinaryIntrinsic(Intrinsic::abs, TI, + Builder.getFalse()); + } + + // Match: (A < B) ? (0 - (A - B)) : (A - B) --> abs(A - B) + if (Pred == CmpInst::ICMP_SLT && + match(FI, m_NSWSub(m_Specific(A), m_Specific(B))) && + match(TI, m_Neg(m_Specific(FI)))) { + return Builder.CreateBinaryIntrinsic(Intrinsic::abs, FI, + Builder.getFalse()); + } + + // Match: (A > B) ? (0 - (B - A)) : (B - A) --> abs(B - A) + if (Pred == CmpInst::ICMP_SGT && + match(FI, m_NSWSub(m_Specific(B), m_Specific(A))) && + match(TI, m_Neg(m_Specific(FI)))) { + return Builder.CreateBinaryIntrinsic(Intrinsic::abs, FI, + Builder.getFalse()); + } + + // Match: (A < B) ? (B - A) : (0 - (B - A)) --> abs(B - A) + if (Pred == CmpInst::ICMP_SLT && + match(TI, m_NSWSub(m_Specific(B), m_Specific(A))) && + match(FI, m_Neg(m_Specific(TI)))) { + return Builder.CreateBinaryIntrinsic(Intrinsic::abs, TI, + Builder.getFalse()); + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index 346111d892975..763d82652dd5d 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -859,4 +859,141 @@ define i32 @abs_range_metadata(i32 %x) { %b = and i32 %a, 15 ret i32 %b } + !1 = !{i32 0, i32 16} + +define i32 @abs_diff(i32 %x, i32 %y) { +; CHECK-LABEL: @abs_diff( +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.abs.i32(i32 [[SUB]], i1 false) +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub nsw i32 %x, %y + %cmp = icmp sgt i32 %x, %y + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} + +define i32 @abs_diff_neg_no_nsw_neg(i32 %x, i32 %y) { +; CHECK-LABEL: @abs_diff_neg_no_nsw_neg( +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], [[Y]] +; CHECK-NEXT: [[SUB1:%.*]] = sub i32 0, [[SUB]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[SUB1]] +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub i32 %x, %y + %cmp = icmp sgt i32 %x, %y + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} + +define i32 @abs_diff_neg(i32 %x, i32 %y) { +; CHECK-LABEL: @abs_diff_neg( +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], [[Y]] +; CHECK-NEXT: [[SUB1:%.*]] = sub i32 0, [[SUB]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[SUB1]] +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub nsw i32 %y, %x + %cmp = icmp sgt i32 %x, %y + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} + +define i32 @abs_diff_neg_no_nsw(i32 %x, i32 %y) { +; CHECK-LABEL: @abs_diff_neg_no_nsw( +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], [[Y]] +; CHECK-NEXT: [[SUB1:%.*]] = sub i32 0, [[SUB]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[SUB1]] +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub i32 %y, %x + %cmp = icmp sgt i32 %x, %y + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} + +define i32 @abs_diff_ge(i32 %x, i32 %y) { +; CHECK-LABEL: @abs_diff_ge( +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.abs.i32(i32 [[SUB]], i1 false) +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub nsw i32 %x, %y + %cmp = icmp sge i32 %x, %y + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} + +define i32 @abs_diff_slt_commute(i32 %x, i32 %y) { +; CHECK-LABEL: @abs_diff_slt_commute( +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.abs.i32(i32 [[SUB]], i1 false) +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub nsw i32 %x, %y + %cmp = icmp slt i32 %y, %x + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} + +define i32 @abs_diff_sge_same(i32 %x, i32 %y) { +; CHECK-LABEL: @abs_diff_sge_same( +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.abs.i32(i32 [[SUB]], i1 false) +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub nsw i32 %x, %y + %cmp = icmp sge i32 %x, %y + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} + +define i32 @abs_diff_sle_inverted(i32 %x, i32 %y) { +; CHECK-LABEL: @abs_diff_sle_inverted( +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.abs.i32(i32 [[SUB]], i1 false) +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub nsw i32 %x, %y + %cmp = icmp sle i32 %x, %y + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub1, i32 %sub + ret i32 %cond +} + +define i32 @abs_diff_sle_commute(i32 %x, i32 %y) { +; CHECK-LABEL: @abs_diff_sle_commute( +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.abs.i32(i32 [[SUB]], i1 false) +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub nsw i32 %x, %y + %cmp = icmp sle i32 %y, %x + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} + +define i8 @abs_diff_sle_y_x(i8 %x, i8 %y) { +; CHECK-LABEL: @abs_diff_sle_y_x( +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = call i8 @llvm.abs.i8(i8 [[SUB]], i1 false) +; CHECK-NEXT: ret i8 [[COND]] +; + %sub = sub nsw i8 %x, %y + %cmp = icmp sle i8 %y, %x + %sub1 = sub i8 0, %sub + %cond = select i1 %cmp, i8 %sub, i8 %sub1 + ret i8 %cond +} From 9ee1f159dccbee1e19ab7584e678af9be1054e2d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 21 Aug 2025 05:43:04 -0700 Subject: [PATCH 249/734] [MLIR] Apply clang-tidy fixes for llvm-qualified-auto in MPIToLLVM.cpp (NFC) --- mlir/lib/Conversion/MPIToLLVM/MPIToLLVM.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/MPIToLLVM/MPIToLLVM.cpp b/mlir/lib/Conversion/MPIToLLVM/MPIToLLVM.cpp index aa47e398eb684..16ef11a8b14de 100644 --- a/mlir/lib/Conversion/MPIToLLVM/MPIToLLVM.cpp +++ b/mlir/lib/Conversion/MPIToLLVM/MPIToLLVM.cpp @@ -272,7 +272,7 @@ class OMPIImplTraits : public MPIImplTraits { Value getCommWorld(const Location loc, ConversionPatternRewriter &rewriter) override { - auto context = rewriter.getContext(); + auto *context = rewriter.getContext(); // get external opaque struct pointer type auto commStructT = LLVM::LLVMStructType::getOpaque("ompi_communicator_t", context); @@ -324,7 +324,7 @@ class OMPIImplTraits : public MPIImplTraits { else assert(false && "unsupported type"); - auto context = rewriter.getContext(); + auto *context = rewriter.getContext(); // get external opaque struct pointer type auto typeStructT = LLVM::LLVMStructType::getOpaque("ompi_predefined_datatype_t", context); @@ -383,7 +383,7 @@ class OMPIImplTraits : public MPIImplTraits { op = "ompi_mpi_replace"; break; } - auto context = rewriter.getContext(); + auto *context = rewriter.getContext(); // get external opaque struct pointer type auto opStructT = LLVM::LLVMStructType::getOpaque("ompi_predefined_op_t", context); From 48babe193186248e1c386a847047d59ab61c762d Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Sun, 14 Sep 2025 09:05:28 -0400 Subject: [PATCH 250/734] [mlir][LLVM] Add LLVMAddrSpaceAttrInterface and NVVMMemorySpaceAttr (#157339) This patch introduces the `LLVMAddrSpaceAttrInterface` for defining compatible LLVM address space attributes To test this interface, this patch also adds: - Adds NVVMMemorySpaceAttr implementing both LLVMAddrSpaceAttrInterface and MemorySpaceAttrInterface - Converts NVVM memory space constants from enum to MLIR enums - Updates all NVVM memory space references to use new attribute system - Adds support for NVVM memory spaces in ptr dialect translation Example: ```mlir llvm.func @nvvm_ptr_address_space( !ptr.ptr<#nvvm.memory_space>, !ptr.ptr<#nvvm.memory_space>, !ptr.ptr<#nvvm.memory_space>, !ptr.ptr<#nvvm.memory_space>, !ptr.ptr<#nvvm.memory_space>, !ptr.ptr<#nvvm.memory_space> ) -> !ptr.ptr<#nvvm.memory_space> ``` Translating the above code to LLVM produces: ```llvm declare ptr @nvvm_ptr_address_space(ptr addrspace(1), ptr addrspace(3), ptr addrspace(4), ptr addrspace(5), ptr addrspace(6), ptr addrspace(7)) ``` To convert the memory space enum to the new enum class use: ```bash grep -r . -e "NVVMMemorySpace::kGenericMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kGenericMemorySpace/NVVMMemorySpace::Generic/g" grep -r . -e "NVVMMemorySpace::kGlobalMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kGlobalMemorySpace/NVVMMemorySpace::Global/g" grep -r . -e "NVVMMemorySpace::kSharedMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kSharedMemorySpace/NVVMMemorySpace::Shared/g" grep -r . -e "NVVMMemorySpace::kConstantMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kConstantMemorySpace/NVVMMemorySpace::Constant/g" grep -r . -e "NVVMMemorySpace::kLocalMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kLocalMemorySpace/NVVMMemorySpace::Local/g" grep -r . -e "NVVMMemorySpace::kTensorMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kTensorMemorySpace/NVVMMemorySpace::Tensor/g" grep -r . -e "NVVMMemorySpace::kSharedClusterMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kSharedClusterMemorySpace/NVVMMemorySpace::SharedCluster/g" ``` NOTE: A future patch will add support for ROCDL, it wasn't added here to keep the patch small. --- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 3 +- .../Transforms/CUFGPUToLLVMConversion.cpp | 3 +- .../mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 1 + mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h | 8 ++ .../mlir/Dialect/LLVMIR/LLVMInterfaces.td | 18 ++++ .../include/mlir/Dialect/LLVMIR/NVVMDialect.h | 35 +++---- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 37 +++++++- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 10 +- .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 10 +- .../GPU/TransformOps/GPUTransformOps.cpp | 8 +- mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp | 14 +-- mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 6 +- mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 95 +++++++++++++++---- .../NVGPU/TransformOps/NVGPUTransformOps.cpp | 8 +- .../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 4 +- mlir/lib/Target/LLVMIR/TypeToLLVM.cpp | 5 +- mlir/test/Dialect/LLVMIR/nvvm.mlir | 10 ++ mlir/test/Target/LLVMIR/ptr.mlir | 22 +++++ 18 files changed, 220 insertions(+), 77 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 0800ed4db8c31..008f7099f58b4 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3211,7 +3211,8 @@ struct GlobalOpConversion : public fir::FIROpConversion { if (global.getDataAttr() && *global.getDataAttr() == cuf::DataAttribute::Shared) - g.setAddrSpace(mlir::NVVM::NVVMMemorySpace::kSharedMemorySpace); + g.setAddrSpace( + static_cast(mlir::NVVM::NVVMMemorySpace::Shared)); rewriter.eraseOp(global); return mlir::success(); diff --git a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp index a40ed95391c3a..40f180a8c1657 100644 --- a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp @@ -221,7 +221,8 @@ static mlir::Value createAddressOfOp(mlir::ConversionPatternRewriter &rewriter, gpu::GPUModuleOp gpuMod, std::string &sharedGlobalName) { auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get( - rewriter.getContext(), mlir::NVVM::NVVMMemorySpace::kSharedMemorySpace); + rewriter.getContext(), + static_cast(mlir::NVVM::NVVMMemorySpace::Shared)); if (auto g = gpuMod.lookupSymbol(sharedGlobalName)) return mlir::LLVM::AddressOfOp::create(rewriter, loc, llvmPtrTy, g.getSymName()); diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index a8c9ef790cfbd..75bce6b0a0e54 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -30,6 +30,7 @@ class LLVM_Attr ]> { let summary = "LLVM address space"; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h index fafccf304e1b4..ce62f0751d876 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h @@ -93,6 +93,14 @@ class TBAANodeAttr : public Attribute { using cconv::CConv; using linkage::Linkage; using tailcallkind::TailCallKind; + +namespace detail { +/// Checks whether the given type is an LLVM type that can be loaded or stored. +bool isValidLoadStoreImpl(Type type, ptr::AtomicOrdering ordering, + std::optional alignment, + const ::mlir::DataLayout *dataLayout, + function_ref emitError); +} // namespace detail } // namespace LLVM } // namespace mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td index 60235bcb35561..e05fb6a9bac7d 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td @@ -533,6 +533,24 @@ def LLVM_DIRecursiveTypeAttrInterface ]; } +def LLVM_LLVMAddrSpaceAttrInterface : + AttrInterface<"LLVMAddrSpaceAttrInterface"> { + let description = [{ + An interface for attributes that represent LLVM address spaces. + Implementing attributes should provide access to the address space value + as an unsigned integer. + }]; + let cppNamespace = "::mlir::LLVM"; + let methods = [ + InterfaceMethod< + /*description=*/"Returns the address space as an unsigned integer.", + /*retTy=*/"unsigned", + /*methodName=*/"getAddressSpace", + /*args=*/(ins) + > + ]; +} + def LLVM_TargetAttrInterface : AttrInterface<"TargetAttrInterface", [DLTIQueryInterface]> { let description = [{ diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h index 6137bb087c576..6bd582d66ed25 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/NVVMRequiresSMTraits.h" +#include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/InferIntRangeInterface.h" @@ -30,31 +31,23 @@ namespace mlir { namespace NVVM { +/// Utility functions to compare NVVMMemorySpace with unsigned values. +inline bool operator==(unsigned as, NVVMMemorySpace memSpace) { + return as == static_cast(memSpace); +} +inline bool operator==(NVVMMemorySpace memSpace, unsigned as) { + return static_cast(memSpace) == as; +} +inline bool operator!=(unsigned as, NVVMMemorySpace memSpace) { + return as != static_cast(memSpace); +} +inline bool operator!=(NVVMMemorySpace memSpace, unsigned as) { + return static_cast(memSpace) != as; +} // Shared memory has 128-bit alignment constexpr int kSharedMemoryAlignmentBit = 128; -/// NVVM memory space identifiers. -enum NVVMMemorySpace { - /// Generic memory space identifier. - kGenericMemorySpace = 0, - /// Global memory space identifier. - kGlobalMemorySpace = 1, - /// Shared memory space identifier. - kSharedMemorySpace = 3, - /// Constant memory space identifier. - kConstantMemorySpace = 4, - /// Local memory space identifier. - kLocalMemorySpace = 5, - /// Tensor memory space identifier. - /// Tensor memory is available only in arch-accelerated - /// variants from sm100 onwards. - kTensorMemorySpace = 6, - /// Distributed shared memory space identifier. - /// Distributed shared memory is available only in sm90+. - kSharedClusterMemorySpace = 7, -}; - /// A pair type of LLVM's Intrinsic ID and args (which are llvm values). /// This type is returned by the getIntrinsicIDAndArgs() methods. using IDArgPair = diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 854b4d26b4368..70ab1df876d35 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -17,6 +17,7 @@ include "mlir/IR/EnumAttr.td" include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Dialect/LLVMIR/NVVMRequiresSMTraits.td" +include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td" include "mlir/Interfaces/InferIntRangeInterface.td" @@ -192,6 +193,40 @@ def CacheEvictionPriorityAttr : EnumAttr; +/// Global memory space identifier. +def MemSpaceGlobal : I32EnumCase<"Global", 1, "global">; +/// Shared memory space identifier. +def MemSpaceShared : I32EnumCase<"Shared", 3, "shared">; +/// Constant memory space identifier. +def MemSpaceConstant : I32EnumCase<"Constant", 4, "constant">; +/// Local memory space identifier. +def MemSpaceLocal : I32EnumCase<"Local", 5, "local">; +/// Tensor memory space identifier. +/// Tensor memory is available only in arch-accelerated +/// variants from sm100 onwards. +def MemSpaceTensor : I32EnumCase<"Tensor", 6, "tensor">; +/// Distributed shared memory space identifier. +/// Distributed shared memory is available only in sm90+. +def MemSpaceSharedCluster : I32EnumCase<"SharedCluster", 7, "shared_cluster">; + +def NVVMMemorySpace : I32Enum<"NVVMMemorySpace", "NVVM Memory Space", + [MemSpaceGeneric, MemSpaceGlobal, MemSpaceShared, + MemSpaceConstant, MemSpaceLocal, MemSpaceTensor, + MemSpaceSharedCluster]> { + let cppNamespace = "::mlir::NVVM"; +} + +def NVVMMemorySpaceAttr : + EnumAttr, + DeclareAttrInterfaceMethods + ]> { + let assemblyFormat = "`<` $value `>`"; +} + //===----------------------------------------------------------------------===// // NVVM intrinsic operations //===----------------------------------------------------------------------===// @@ -3592,7 +3627,7 @@ def NVVM_MapaOp: NVVM_Op<"mapa", string llvmBuilder = [{ int addrSpace = llvm::cast(op.getA().getType()).getAddressSpace(); - bool isSharedMemory = addrSpace == NVVM::NVVMMemorySpace::kSharedMemorySpace; + bool isSharedMemory = addrSpace == static_cast (NVVM::NVVMMemorySpace::Shared); auto intId = isSharedMemory? llvm::Intrinsic::nvvm_mapa_shared_cluster : llvm::Intrinsic::nvvm_mapa; $res = createIntrinsicCall(builder, intId, {$a, $b}); diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 93e370d91e6b9..76a7e0f3831a2 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -451,16 +451,14 @@ void mlir::configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter) { converter, [](gpu::AddressSpace space) -> unsigned { switch (space) { case gpu::AddressSpace::Global: - return static_cast( - NVVM::NVVMMemorySpace::kGlobalMemorySpace); + return static_cast(NVVM::NVVMMemorySpace::Global); case gpu::AddressSpace::Workgroup: - return static_cast( - NVVM::NVVMMemorySpace::kSharedMemorySpace); + return static_cast(NVVM::NVVMMemorySpace::Shared); case gpu::AddressSpace::Private: return 0; } llvm_unreachable("unknown address space enum value"); - return 0; + return static_cast(NVVM::NVVMMemorySpace::Generic); }); // Lowering for MMAMatrixType. converter.addConversion([&](gpu::MMAMatrixType type) -> Type { @@ -648,7 +646,7 @@ void mlir::populateGpuToNVVMConversionPatterns( GPUFuncOpLoweringOptions{ /*allocaAddrSpace=*/0, /*workgroupAddrSpace=*/ - static_cast(NVVM::NVVMMemorySpace::kSharedMemorySpace), + static_cast(NVVM::NVVMMemorySpace::Shared), StringAttr::get(&converter.getContext(), NVVM::NVVMDialect::getKernelFuncAttrName()), StringAttr::get(&converter.getContext(), diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index 37d12bad298df..b7e3491117e9b 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -405,16 +405,14 @@ struct ConvertNVGPUToNVVMPass converter, [](gpu::AddressSpace space) -> unsigned { switch (space) { case gpu::AddressSpace::Global: - return static_cast( - NVVM::NVVMMemorySpace::kGlobalMemorySpace); + return static_cast(NVVM::NVVMMemorySpace::Global); case gpu::AddressSpace::Workgroup: - return static_cast( - NVVM::NVVMMemorySpace::kSharedMemorySpace); + return static_cast(NVVM::NVVMMemorySpace::Shared); case gpu::AddressSpace::Private: return 0; } llvm_unreachable("unknown address space enum value"); - return 0; + return static_cast(NVVM::NVVMMemorySpace::Generic); }); /// device-side async tokens cannot be materialized in nvvm. We just /// convert them to a dummy i32 type in order to easily drop them during @@ -677,7 +675,7 @@ struct NVGPUAsyncCopyLowering adaptor.getSrcIndices()); // Intrinsics takes a global pointer so we need an address space cast. auto srcPointerGlobalType = LLVM::LLVMPointerType::get( - op->getContext(), NVVM::NVVMMemorySpace::kGlobalMemorySpace); + op->getContext(), static_cast(NVVM::NVVMMemorySpace::Global)); scrPtr = LLVM::AddrSpaceCastOp::create(b, srcPointerGlobalType, scrPtr); int64_t dstElements = adaptor.getDstElements().getZExtValue(); int64_t sizeInBytes = diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index c766539f9d91a..2561f6606067f 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -71,16 +71,14 @@ void transform::ApplyGPUToNVVMConversionPatternsOp::populatePatterns( llvmTypeConverter, [](AddressSpace space) -> unsigned { switch (space) { case AddressSpace::Global: - return static_cast( - NVVM::NVVMMemorySpace::kGlobalMemorySpace); + return static_cast(NVVM::NVVMMemorySpace::Global); case AddressSpace::Workgroup: - return static_cast( - NVVM::NVVMMemorySpace::kSharedMemorySpace); + return static_cast(NVVM::NVVMMemorySpace::Shared); case AddressSpace::Private: return 0; } llvm_unreachable("unknown address space enum value"); - return 0; + return static_cast(NVVM::NVVMMemorySpace::Generic); }); // Used in GPUToNVVM/WmmaOpsToNvvm.cpp so attaching here for now. // TODO: We should have a single to_nvvm_type_converter. diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp index 23610fbd657fd..b8331e0068880 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp @@ -57,10 +57,10 @@ void LLVMDialect::registerAttributes() { //===----------------------------------------------------------------------===// /// Checks whether the given type is an LLVM type that can be loaded or stored. -static bool isValidLoadStoreImpl(Type type, ptr::AtomicOrdering ordering, - std::optional alignment, - const ::mlir::DataLayout *dataLayout, - function_ref emitError) { +bool LLVM::detail::isValidLoadStoreImpl( + Type type, ptr::AtomicOrdering ordering, std::optional alignment, + const ::mlir::DataLayout *dataLayout, + function_ref emitError) { if (!isLoadableType(type)) { if (emitError) emitError() << "type must be LLVM type with size, but got " << type; @@ -87,14 +87,16 @@ bool AddressSpaceAttr::isValidLoad( Type type, ptr::AtomicOrdering ordering, std::optional alignment, const ::mlir::DataLayout *dataLayout, function_ref emitError) const { - return isValidLoadStoreImpl(type, ordering, alignment, dataLayout, emitError); + return detail::isValidLoadStoreImpl(type, ordering, alignment, dataLayout, + emitError); } bool AddressSpaceAttr::isValidStore( Type type, ptr::AtomicOrdering ordering, std::optional alignment, const ::mlir::DataLayout *dataLayout, function_ref emitError) const { - return isValidLoadStoreImpl(type, ordering, alignment, dataLayout, emitError); + return detail::isValidLoadStoreImpl(type, ordering, alignment, dataLayout, + emitError); } bool AddressSpaceAttr::isValidAtomicOp( diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index 2dd0132a65bc4..01a16ce2d8a7f 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -703,14 +703,14 @@ const llvm::fltSemantics &LLVMPPCFP128Type::getFloatSemantics() const { //===----------------------------------------------------------------------===// /// Check whether type is a compatible ptr type. These are pointer-like types -/// with no element type, no metadata, and using the LLVM AddressSpaceAttr -/// memory space. +/// with no element type, no metadata, and using the LLVM +/// LLVMAddrSpaceAttrInterface memory space. static bool isCompatiblePtrType(Type type) { auto ptrTy = dyn_cast(type); if (!ptrTy) return false; return !ptrTy.hasPtrMetadata() && ptrTy.getElementType() == nullptr && - isa(ptrTy.getMemorySpace()); + isa(ptrTy.getMemorySpace()); } bool mlir::LLVM::isCompatibleOuterType(Type type) { diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index e364475251901..9dba0848eba94 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -800,8 +800,8 @@ inferMMATypeFromMNK(NVVM::MMATypes type, NVVM::MMAFrag frag, int m, int n, LogicalResult NVVM::WMMALoadOp::verify() { unsigned addressSpace = llvm::cast(getPtr().getType()).getAddressSpace(); - if (addressSpace != 0 && addressSpace != NVVM::kGlobalMemorySpace && - addressSpace != NVVM::kSharedMemorySpace) + if (addressSpace != 0 && addressSpace != NVVMMemorySpace::Global && + addressSpace != NVVMMemorySpace::Shared) return emitOpError("expected source pointer in memory " "space 0, 1, 3"); @@ -821,8 +821,8 @@ LogicalResult NVVM::WMMALoadOp::verify() { LogicalResult NVVM::WMMAStoreOp::verify() { unsigned addressSpace = llvm::cast(getPtr().getType()).getAddressSpace(); - if (addressSpace != 0 && addressSpace != NVVM::kGlobalMemorySpace && - addressSpace != NVVM::kSharedMemorySpace) + if (addressSpace != 0 && addressSpace != NVVMMemorySpace::Global && + addressSpace != NVVMMemorySpace::Shared) return emitOpError("expected operands to be a source pointer in memory " "space 0, 1, 3"); @@ -1339,8 +1339,8 @@ LogicalResult NVVM::PrefetchOp::verify() { return emitOpError("cannot specify both tensormap and cache level"); if (getTensormap()) { - if (addressSpace != MemSpace::kGenericMemorySpace && - addressSpace != MemSpace::kConstantMemorySpace) { + if (addressSpace != MemSpace::Generic && + addressSpace != MemSpace::Constant) { return emitOpError( "prefetch tensormap requires a generic or constant pointer"); } @@ -1350,15 +1350,14 @@ LogicalResult NVVM::PrefetchOp::verify() { "prefetch tensormap does not support eviction priority"); } - if (getInParamSpace() && addressSpace != MemSpace::kGenericMemorySpace) { + if (getInParamSpace() && addressSpace != MemSpace::Generic) { return emitOpError( "in_param_space can only be specified for a generic pointer"); } } else if (cacheLevel) { - if (addressSpace != MemSpace::kGenericMemorySpace && - addressSpace != MemSpace::kGlobalMemorySpace && - addressSpace != MemSpace::kLocalMemorySpace) { + if (addressSpace != MemSpace::Generic && addressSpace != MemSpace::Global && + addressSpace != MemSpace::Local) { return emitOpError("prefetch to cache level requires a generic, global, " "or local pointer"); } @@ -1370,7 +1369,7 @@ LogicalResult NVVM::PrefetchOp::verify() { "cache level is L1"); } - if (addressSpace != MemSpace::kGenericMemorySpace) { + if (addressSpace != MemSpace::Generic) { return emitOpError( "prefetch to uniform cache requires a generic pointer"); } @@ -1381,7 +1380,7 @@ LogicalResult NVVM::PrefetchOp::verify() { return emitOpError( "cache eviction priority supported only for cache level L2"); - if (addressSpace != MemSpace::kGlobalMemorySpace) + if (addressSpace != MemSpace::Global) return emitOpError("cache eviction priority requires a global pointer"); if (*evictPriority != NVVM::CacheEvictionPriority::EvictNormal && @@ -1796,7 +1795,7 @@ Tcgen05AllocOp::getIntrinsicIDAndArgs(Operation &op, auto curOp = cast(op); unsigned as = llvm::cast(curOp.getAddr().getType()) .getAddressSpace(); - bool isShared = as == NVVMMemorySpace::kSharedMemorySpace; + bool isShared = as == NVVMMemorySpace::Shared; bool is2CTAMode = curOp.getGroup() == CTAGroupKind::CTA_2; llvm::Intrinsic::ID id; @@ -1845,7 +1844,7 @@ Tcgen05CommitOp::getIntrinsicIDAndArgs(Operation &op, auto curOp = cast(op); unsigned as = llvm::cast(curOp.getAddr().getType()) .getAddressSpace(); - bool isShared = as == NVVMMemorySpace::kSharedMemorySpace; + bool isShared = as == NVVMMemorySpace::Shared; bool hasMulticast = static_cast(curOp.getMulticastMask()); bool is2CTAMode = curOp.getGroup() == CTAGroupKind::CTA_2; @@ -2051,18 +2050,18 @@ PrefetchOp::getIntrinsicIDAndArgs(NVVM::PrefetchOp &op, } } - switch (addressSpace) { - case MemSpace::kGenericMemorySpace: + switch (static_cast(addressSpace)) { + case MemSpace::Generic: return *cacheLevel == CacheLevel::L1 ? NVVM::IDArgPair({llvm::Intrinsic::nvvm_prefetch_L1, args}) : NVVM::IDArgPair({llvm::Intrinsic::nvvm_prefetch_L2, args}); - case MemSpace::kGlobalMemorySpace: + case MemSpace::Global: return *cacheLevel == CacheLevel::L1 ? NVVM::IDArgPair( {llvm::Intrinsic::nvvm_prefetch_global_L1, args}) : NVVM::IDArgPair( {llvm::Intrinsic::nvvm_prefetch_global_L2, args}); - case MemSpace::kLocalMemorySpace: + case MemSpace::Local: return *cacheLevel == CacheLevel::L1 ? NVVM::IDArgPair( {llvm::Intrinsic::nvvm_prefetch_local_L1, args}) @@ -2185,6 +2184,66 @@ LogicalResult NVVMDialect::verifyRegionArgAttribute(Operation *op, return success(); } +//===----------------------------------------------------------------------===// +// NVVM Address Space Attr +//===----------------------------------------------------------------------===// + +unsigned NVVMMemorySpaceAttr::getAddressSpace() const { + return static_cast(getValue()); +} + +bool NVVMMemorySpaceAttr::isValidLoad( + Type type, ptr::AtomicOrdering ordering, std::optional alignment, + const ::mlir::DataLayout *dataLayout, + function_ref emitError) const { + return LLVM::detail::isValidLoadStoreImpl(type, ordering, alignment, + dataLayout, emitError); +} + +bool NVVMMemorySpaceAttr::isValidStore( + Type type, ptr::AtomicOrdering ordering, std::optional alignment, + const ::mlir::DataLayout *dataLayout, + function_ref emitError) const { + return LLVM::detail::isValidLoadStoreImpl(type, ordering, alignment, + dataLayout, emitError); +} + +bool NVVMMemorySpaceAttr::isValidAtomicOp( + ptr::AtomicBinOp op, Type type, ptr::AtomicOrdering ordering, + std::optional alignment, const ::mlir::DataLayout *dataLayout, + function_ref emitError) const { + // TODO: update this method once `ptr.atomic_rmw` is implemented. + assert(false && "unimplemented, see TODO in the source."); + return false; +} + +bool NVVMMemorySpaceAttr::isValidAtomicXchg( + Type type, ptr::AtomicOrdering successOrdering, + ptr::AtomicOrdering failureOrdering, std::optional alignment, + const ::mlir::DataLayout *dataLayout, + function_ref emitError) const { + // TODO: update this method once `ptr.atomic_cmpxchg` is implemented. + assert(false && "unimplemented, see TODO in the source."); + return false; +} + +bool NVVMMemorySpaceAttr::isValidAddrSpaceCast( + Type tgt, Type src, function_ref emitError) const { + // TODO: update this method once the `ptr.addrspace_cast` op is added to the + // dialect. + assert(false && "unimplemented, see TODO in the source."); + return false; +} + +bool NVVMMemorySpaceAttr::isValidPtrIntCast( + Type intLikeTy, Type ptrLikeTy, + function_ref emitError) const { + // TODO: update this method once the int-cast ops are added to the `ptr` + // dialect. + assert(false && "unimplemented, see TODO in the source."); + return false; +} + //===----------------------------------------------------------------------===// // NVVM target attribute. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp index bc3e8b2b17fb1..46e82bd8fc8c8 100644 --- a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp +++ b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp @@ -53,16 +53,14 @@ void transform::ApplyNVGPUToNVVMConversionPatternsOp::populatePatterns( llvmTypeConverter, [](gpu::AddressSpace space) -> unsigned { switch (space) { case gpu::AddressSpace::Global: - return static_cast( - NVVM::NVVMMemorySpace::kGlobalMemorySpace); + return static_cast(NVVM::NVVMMemorySpace::Global); case gpu::AddressSpace::Workgroup: - return static_cast( - NVVM::NVVMMemorySpace::kSharedMemorySpace); + return static_cast(NVVM::NVVMMemorySpace::Shared); case gpu::AddressSpace::Private: return 0; } llvm_unreachable("unknown address space enum value"); - return 0; + return static_cast(NVVM::NVVMMemorySpace::Generic); }); llvmTypeConverter.addConversion( [&](nvgpu::DeviceAsyncTokenType type) -> Type { diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp index 7f69af14df338..3d86b09b32538 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp @@ -253,8 +253,8 @@ getStMatrixIntrinsicId(NVVM::MMALayout layout, int32_t num, /// Return the intrinsic ID associated with st.bulk for the given address type. static llvm::Intrinsic::ID getStBulkIntrinsicId(LLVM::LLVMPointerType addrType) { - bool isSharedMemory = - addrType.getAddressSpace() == NVVM::NVVMMemorySpace::kSharedMemorySpace; + bool isSharedMemory = addrType.getAddressSpace() == + static_cast(NVVM::NVVMMemorySpace::Shared); return isSharedMemory ? llvm::Intrinsic::nvvm_st_bulk_shared_cta : llvm::Intrinsic::nvvm_st_bulk; } diff --git a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp index ddd5946ce5d63..4d204744450a8 100644 --- a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp +++ b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp @@ -152,8 +152,9 @@ class TypeToLLVMIRTranslatorImpl { /// Translates the given ptr type. llvm::Type *translate(PtrLikeTypeInterface type) { - auto memSpace = dyn_cast(type.getMemorySpace()); - assert(memSpace && "expected pointer with the LLVM address space"); + auto memSpace = + dyn_cast(type.getMemorySpace()); + assert(memSpace && "expected pointer with an LLVM address space"); assert(!type.hasPtrMetadata() && "expected pointer without metadata"); return llvm::PointerType::get(context, memSpace.getAddressSpace()); } diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 5209b3c1d7906..3277e62893527 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -620,6 +620,16 @@ func.func @prefetch_tensormap(%gen_ptr: !llvm.ptr, %const_ptr: !llvm.ptr<4>) { return } +// CHECK-LABEL: @nvvm_address_space +func.func private @nvvm_address_space( + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space> + ) -> !ptr.ptr<#nvvm.memory_space> + // ----- // Just check these don't emit errors. diff --git a/mlir/test/Target/LLVMIR/ptr.mlir b/mlir/test/Target/LLVMIR/ptr.mlir index 4b29be813fa81..9b99dd8e3a3eb 100644 --- a/mlir/test/Target/LLVMIR/ptr.mlir +++ b/mlir/test/Target/LLVMIR/ptr.mlir @@ -233,3 +233,25 @@ llvm.func @ptr_add_vector_base_scalar_offset(%ptrs: vector<4x!ptr.ptr<#llvm.addr %res = ptr.ptr_add %ptrs, %offset : vector<4x!ptr.ptr<#llvm.address_space<0>>>, i32 llvm.return %res : vector<4x!ptr.ptr<#llvm.address_space<0>>> } + +// CHECK-LABEL: declare ptr @nvvm_ptr_address_space(ptr addrspace(1), ptr addrspace(3), ptr addrspace(4), ptr addrspace(5), ptr addrspace(6), ptr addrspace(7)) +llvm.func @nvvm_ptr_address_space( + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space>, + !ptr.ptr<#nvvm.memory_space> + ) -> !ptr.ptr<#nvvm.memory_space> + +// CHECK-LABEL: define void @llvm_ops_with_ptr_nvvm_values +// CHECK-SAME: (ptr %[[ARG:.*]]) { +// CHECK-NEXT: %[[V0:.*]] = load ptr addrspace(1), ptr %[[ARG]], align 8 +// CHECK-NEXT: store ptr addrspace(1) %[[V0]], ptr %[[ARG]], align 8 +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @llvm_ops_with_ptr_nvvm_values(%arg0: !llvm.ptr) { + %1 = llvm.load %arg0 : !llvm.ptr -> !ptr.ptr<#nvvm.memory_space> + llvm.store %1, %arg0 : !ptr.ptr<#nvvm.memory_space>, !llvm.ptr + llvm.return +} From 24f836a8aef0a55be4c93e9b7e0fcf4bbcd00ebb Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 21 Aug 2025 09:32:55 -0700 Subject: [PATCH 251/734] [MLIR] Apply clang-tidy fixes for llvm-else-after-return in TosaProfileCompliance.cpp (NFC) --- mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp index 9543fa1fe39d8..20f9333e7c951 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp @@ -592,7 +592,8 @@ llvm::SmallString<7> TosaProfileCompliance::stringifyTypeInfo(const TypeInfo &typeInfo) { if (typeInfo.typeID == mlir::IntegerType::getTypeID()) { return {"i" + llvm::utostr(typeInfo.bitWidth)}; - } else if (typeInfo.typeID == mlir::Float16Type::getTypeID()) { + } + if (typeInfo.typeID == mlir::Float16Type::getTypeID()) { return {"f16"}; } else if (typeInfo.typeID == mlir::Float32Type::getTypeID()) { return {"f32"}; From 3336c6cc2c7e729f6b292ba3929b6c083fcff21b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 07:07:53 -0700 Subject: [PATCH 252/734] [ADT] Fix comment typos in DenseMap.h (#158457) - The math is wrong. - We are discussing "inequality". - "For example" requires ",". --- llvm/include/llvm/ADT/DenseMap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index f076049c55a26..23b672eaf8b47 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -372,8 +372,8 @@ class DenseMapBase : public DebugEpochBase { // Ensure that "NumEntries * 4 < NumBuckets * 3" if (NumEntries == 0) return 0; - // +1 is required because of the strict equality. - // For example if NumEntries is 48, we need to return 401. + // +1 is required because of the strict inequality. + // For example, if NumEntries is 48, we need to return 128. return NextPowerOf2(NumEntries * 4 / 3 + 1); } From 0e79732289cdd1d8e19a2151b780a4f02f814e2e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 07:08:01 -0700 Subject: [PATCH 253/734] [llvm] Proofread Atomics.rst (#158459) --- llvm/docs/Atomics.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/docs/Atomics.rst b/llvm/docs/Atomics.rst index 4dee3e6bd9f4f..522aed150bf62 100644 --- a/llvm/docs/Atomics.rst +++ b/llvm/docs/Atomics.rst @@ -43,8 +43,8 @@ address, the first store can be erased. This transformation is not allowed for a pair of volatile stores. On the other hand, a non-volatile non-atomic load can be moved across a volatile load freely, but not an Acquire load. -This document is intended to provide a guide to anyone either writing a frontend -for LLVM or working on optimization passes for LLVM with a guide for how to deal +This document is intended to guide anyone writing a frontend +for LLVM or working on optimization passes for LLVM on how to deal with instructions with special semantics in the presence of concurrency. This is not intended to be a precise guide to the semantics; the details can get extremely complicated and unreadable, and are not usually necessary. @@ -94,7 +94,7 @@ The following is equivalent in non-concurrent situations: However, LLVM is not allowed to transform the former to the latter: it could indirectly introduce undefined behavior if another thread can access ``x`` at -the same time. That thread would read `undef` instead of the value it was +the same time. That thread would read ``undef`` instead of the value it was expecting, which can lead to undefined behavior down the line. (This example is particularly of interest because before the concurrency model was implemented, LLVM would perform this transformation.) @@ -149,7 +149,7 @@ NotAtomic NotAtomic is the obvious, a load or store which is not atomic. (This isn't really a level of atomicity, but is listed here for comparison.) This is essentially a regular load or store. If there is a race on a given memory -location, loads from that location return undef. +location, loads from that location return ``undef``. Relevant standard This is intended to match shared variables in C/C++, and to be used in any @@ -429,7 +429,7 @@ support *ALL* operations of that size in a lock-free manner. When the target implements atomic ``cmpxchg`` or LL/SC instructions (as most do) this is trivial: all the other operations can be implemented on top of those -primitives. However, on many older CPUs (e.g. ARMv5, SparcV8, Intel 80386) there +primitives. However, on many older CPUs (e.g. ARMv5, Sparc V8, Intel 80386) there are atomic load and store instructions, but no ``cmpxchg`` or LL/SC. As it is invalid to implement ``atomic load`` using the native instruction, but ``cmpxchg`` using a library call to a function that uses a mutex, ``atomic @@ -475,7 +475,7 @@ atomic constructs. Here are some lowerings it can do: ``shouldExpandAtomicRMWInIR``, ``emitMaskedAtomicRMWIntrinsic``, ``shouldExpandAtomicCmpXchgInIR``, and ``emitMaskedAtomicCmpXchgIntrinsic``. -For an example of these look at the ARM (first five lowerings) or RISC-V (last +For an example of these, look at the ARM (first five lowerings) or RISC-V (last lowering) backend. AtomicExpandPass supports two strategies for lowering atomicrmw/cmpxchg to @@ -542,7 +542,7 @@ to take note of: - They support all sizes and alignments -- including those which cannot be implemented natively on any existing hardware. Therefore, they will certainly - use mutexes in for some sizes/alignments. + use mutexes for some sizes/alignments. - As a consequence, they cannot be shipped in a statically linked compiler-support library, as they have state which must be shared amongst all @@ -568,7 +568,7 @@ Libcalls: __sync_* Some targets or OS/target combinations can support lock-free atomics, but for various reasons, it is not practical to emit the instructions inline. -There's two typical examples of this. +There are two typical examples of this. Some CPUs support multiple instruction sets which can be switched back and forth on function-call boundaries. For example, MIPS supports the MIPS16 ISA, which @@ -589,7 +589,7 @@ case. The only common architecture without that property is SPARC -- SPARCV8 SMP systems were common, yet it doesn't support any sort of compare-and-swap operation. -Some targets (like RISCV) support a ``+forced-atomics`` target feature, which +Some targets (like RISC-V) support a ``+forced-atomics`` target feature, which enables the use of lock-free atomics even if LLVM is not aware of any specific OS support for them. In this case, the user is responsible for ensuring that necessary ``__sync_*`` implementations are available. Code using @@ -653,6 +653,6 @@ implemented in both ``compiler-rt`` and ``libgcc`` libraries iN __aarch64_ldeorN_ORDER(iN val, iN *ptr) iN __aarch64_ldsetN_ORDER(iN val, iN *ptr) -Please note, if LSE instruction set is specified for AArch64 target then +Please note, if LSE instruction set is specified for AArch64 target, then out-of-line atomics calls are not generated and single-instruction atomic operations are used in place. From ba985b9a7392183bb3a5af707da6256406724509 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sun, 14 Sep 2025 17:38:58 +0300 Subject: [PATCH 254/734] [CSKY][Xtensa] Add missing dependency on TargetParser Became necessary after f3efbce4. --- llvm/lib/Target/CSKY/CMakeLists.txt | 1 + llvm/lib/Target/Xtensa/CMakeLists.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/lib/Target/CSKY/CMakeLists.txt b/llvm/lib/Target/CSKY/CMakeLists.txt index 4b900bc99c271..433f3c821f9ee 100644 --- a/llvm/lib/Target/CSKY/CMakeLists.txt +++ b/llvm/lib/Target/CSKY/CMakeLists.txt @@ -44,6 +44,7 @@ add_llvm_target(CSKYCodeGen SelectionDAG Support Target + TargetParser ADD_TO_COMPONENT CSKY diff --git a/llvm/lib/Target/Xtensa/CMakeLists.txt b/llvm/lib/Target/Xtensa/CMakeLists.txt index 4fc1ba6dfa650..c698b42b00d10 100644 --- a/llvm/lib/Target/Xtensa/CMakeLists.txt +++ b/llvm/lib/Target/Xtensa/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_target(XtensaCodeGen SelectionDAG Support Target + TargetParser XtensaDesc XtensaInfo From bfedb4a938438044b1c1b84a2683419f9c7143b4 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sun, 14 Sep 2025 10:39:30 -0400 Subject: [PATCH 255/734] [gn] port b31f8cb1c910 --- llvm/utils/gn/secondary/lldb/test/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/lldb/test/BUILD.gn b/llvm/utils/gn/secondary/lldb/test/BUILD.gn index 6b1cac748558c..e82fe2d11b75d 100644 --- a/llvm/utils/gn/secondary/lldb/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/test/BUILD.gn @@ -142,6 +142,7 @@ write_lit_cfg("lit_shell_site_cfg") { "LLDB_TOOL_LLDB_SERVER_BUILD=1", "LLDB_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), "LLDB_USE_SYSTEM_DEBUGSERVER=1", # XXX port //lldb/tools/debugserver (?) + "LLVM_ENABLE_DIA_SDK=0", # FIXME: option? just enable on windows? "LLVM_HOST_TRIPLE=$llvm_current_triple", "LLVM_USE_SANITIZER=", "Python3_EXECUTABLE=$python_path", From 1a65e63c596d9459f49e4495e92cdecac2795f71 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Sun, 14 Sep 2025 11:44:59 -0400 Subject: [PATCH 256/734] [mlir][ptr] Add ConstantOp with NullAttr and AddressAttr support (#157347) This patch introduces the `ptr.constant` operation. It also adds the `NullAttr` and `AddressAttr` for representing null pointers, and integer raw addresses. It also implements LLVM IR translation for `ptr.constant` with `#ptr.null` or `#ptr.address` attributes. Finally, it extends `FieldParser` to support APInt parsing. Example: ```mlir llvm.func @constant_address_op() -> !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> { %0 = ptr.constant #ptr.null : !ptr.ptr<#llvm.address_space<0>> %1 = ptr.constant #ptr.address<0x1000> : !ptr.ptr<#llvm.address_space<1>> %2 = ptr.constant #ptr.address<3735928559> : !ptr.ptr<#llvm.address_space<2>> %3 = llvm.mlir.poison : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> %4 = llvm.insertvalue %0, %3[0] : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> %5 = llvm.insertvalue %1, %4[1] : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> %6 = llvm.insertvalue %2, %5[2] : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> llvm.return %6 : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> } ``` Result of translation to LLVM IR: ```llvm define { ptr, ptr addrspace(1), ptr addrspace(2) } @constant_address_op() { ret { ptr, ptr addrspace(1), ptr addrspace(2) } { ptr null, ptr addrspace(1) inttoptr (i64 4096 to ptr addrspace(1)), ptr addrspace(2) inttoptr (i64 3735928559 to ptr addrspace(2)) } } ``` This patch also changes all the `convert*` occurrences in function names or comments to `translate` in the PtrToLLVM file. --------- Co-authored-by: Mehdi Amini --- .../mlir/Dialect/Ptr/IR/PtrAttrDefs.td | 60 ++++++- mlir/include/mlir/Dialect/Ptr/IR/PtrAttrs.h | 6 + mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td | 39 ++++- mlir/include/mlir/IR/DialectImplementation.h | 7 +- mlir/lib/Dialect/Ptr/IR/PtrDialect.cpp | 6 + .../Dialect/Ptr/PtrToLLVMIRTranslation.cpp | 150 ++++++++++++------ mlir/test/Dialect/Ptr/ops.mlir | 24 ++- mlir/test/Target/LLVMIR/ptr.mlir | 36 ++++- 8 files changed, 262 insertions(+), 66 deletions(-) diff --git a/mlir/include/mlir/Dialect/Ptr/IR/PtrAttrDefs.td b/mlir/include/mlir/Dialect/Ptr/IR/PtrAttrDefs.td index 4542f57a62d79..78006d2dec40d 100644 --- a/mlir/include/mlir/Dialect/Ptr/IR/PtrAttrDefs.td +++ b/mlir/include/mlir/Dialect/Ptr/IR/PtrAttrDefs.td @@ -22,6 +22,34 @@ class Ptr_Attr + ]> { + let summary = "Address attribute"; + let description = [{ + The `address` attribute represents a raw memory address, expressed in bytes. + + Example: + + ```mlir + #ptr.address<0x1000> : !ptr.ptr<#ptr.generic_space> + ``` + }]; + let parameters = (ins AttributeSelfTypeParameter<"", "PtrType">:$type, + APIntParameter<"">:$value); + let builders = [ + AttrBuilderWithInferredContext<(ins "PtrType":$type, + "const llvm::APInt &":$value), [{ + return $_get(type.getContext(), type, value); + }]> + ]; + let assemblyFormat = "`<` $value `>`"; +} + //===----------------------------------------------------------------------===// // GenericSpaceAttr //===----------------------------------------------------------------------===// @@ -37,16 +65,42 @@ def Ptr_GenericSpaceAttr : - Load and store operations are always valid, regardless of the type. - Atomic operations are always valid, regardless of the type. - Cast operations to `generic_space` are always valid. - + Example: ```mlir - #ptr.generic_space + #ptr.generic_space : !ptr.ptr<#ptr.generic_space> ``` }]; let assemblyFormat = ""; } +//===----------------------------------------------------------------------===// +// NullAttr +//===----------------------------------------------------------------------===// + +def Ptr_NullAttr : Ptr_Attr<"Null", "null", [ + DeclareAttrInterfaceMethods + ]> { + let summary = "Null pointer attribute"; + let description = [{ + The `null` attribute represents a null pointer. + + Example: + + ```mlir + #ptr.null + ``` + }]; + let parameters = (ins AttributeSelfTypeParameter<"", "PtrType">:$type); + let builders = [ + AttrBuilderWithInferredContext<(ins "PtrType":$type), [{ + return $_get(type.getContext(), type); + }]> + ]; + let assemblyFormat = ""; +} + //===----------------------------------------------------------------------===// // SpecAttr //===----------------------------------------------------------------------===// @@ -62,7 +116,7 @@ def Ptr_SpecAttr : Ptr_Attr<"Spec", "spec"> { - [Optional] index: bitwidth that should be used when performing index computations for the type. Setting the field to `kOptionalSpecValue`, means the field is optional. - + Furthermore, the attribute will verify that all present values are divisible by 8 (number of bits in a byte), and that `preferred` > `abi`. diff --git a/mlir/include/mlir/Dialect/Ptr/IR/PtrAttrs.h b/mlir/include/mlir/Dialect/Ptr/IR/PtrAttrs.h index bb01ceaaeea54..c252f9efd0471 100644 --- a/mlir/include/mlir/Dialect/Ptr/IR/PtrAttrs.h +++ b/mlir/include/mlir/Dialect/Ptr/IR/PtrAttrs.h @@ -21,6 +21,12 @@ #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" #include "mlir/Dialect/Ptr/IR/PtrEnums.h" +namespace mlir { +namespace ptr { +class PtrType; +} // namespace ptr +} // namespace mlir + #define GET_ATTRDEF_CLASSES #include "mlir/Dialect/Ptr/IR/PtrOpsAttrs.h.inc" diff --git a/mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td b/mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td index 3ac12978b947c..468a3004d5c62 100644 --- a/mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td +++ b/mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td @@ -36,7 +36,7 @@ class Ptr_ShapedValueType allowedTypes, list preds = []> : /*cppType=*/"::mlir::ShapedType">; // A ptr-like type, either scalar or shaped type with value semantics. -def Ptr_PtrLikeType : +def Ptr_PtrLikeType : AnyTypeOf<[Ptr_ShapedValueType<[Ptr_PtrType], [HasRankPred]>, Ptr_PtrType]>; // An int-like type, either scalar or shaped type with value semantics. @@ -57,6 +57,31 @@ def Ptr_Mask1DType : def Ptr_Ptr1DType : Ptr_ShapedValueType<[Ptr_PtrType], [HasAnyRankOfPred<[1]>]>; +//===----------------------------------------------------------------------===// +// ConstantOp +//===----------------------------------------------------------------------===// + +def Ptr_ConstantOp : Pointer_Op<"constant", [ + ConstantLike, Pure, AllTypesMatch<["value", "result"]> + ]> { + let summary = "Pointer constant operation"; + let description = [{ + The `constant` operation produces a pointer constant. The attribute must be + a typed attribute of pointer type. + + Example: + + ```mlir + // Create a null pointer + %null = ptr.constant #ptr.null : !ptr.ptr<#ptr.generic_space> + ``` + }]; + let arguments = (ins TypedAttrInterface:$value); + let results = (outs Ptr_PtrType:$result); + let assemblyFormat = "attr-dict $value"; + let hasFolder = 1; +} + //===----------------------------------------------------------------------===// // FromPtrOp //===----------------------------------------------------------------------===// @@ -81,7 +106,7 @@ def Ptr_FromPtrOp : Pointer_Op<"from_ptr", [ ```mlir %typed_ptr = ptr.from_ptr %ptr : !ptr.ptr<#ptr.generic_space> -> !my.ptr %memref = ptr.from_ptr %ptr metadata %md : !ptr.ptr<#ptr.generic_space> -> memref - + // Cast the `%ptr` to a memref without utilizing metadata. %memref = ptr.from_ptr %ptr : !ptr.ptr<#ptr.generic_space> -> memref ``` @@ -361,13 +386,13 @@ def Ptr_PtrAddOp : Pointer_Op<"ptr_add", [ // Scalar base and offset %x_off = ptr.ptr_add %x, %off : !ptr.ptr<#ptr.generic_space>, i32 %x_off0 = ptr.ptr_add nusw %x, %off : !ptr.ptr<#ptr.generic_space>, i32 - + // Shaped base with scalar offset %ptrs_off = ptr.ptr_add %ptrs, %off : vector<4x!ptr.ptr<#ptr.generic_space>>, i32 - + // Scalar base with shaped offset %x_offs = ptr.ptr_add %x, %offs : !ptr.ptr<#ptr.generic_space>, vector<4xi32> - + // Both base and offset are shaped %ptrs_offs = ptr.ptr_add %ptrs, %offs : vector<4x!ptr.ptr<#ptr.generic_space>>, vector<4xi32> ``` @@ -382,7 +407,7 @@ def Ptr_PtrAddOp : Pointer_Op<"ptr_add", [ }]; let hasFolder = 1; let extraClassDeclaration = [{ - /// `ViewLikeOp::getViewSource` method. + /// `ViewLikeOp::getViewSource` method. Value getViewSource() { return getBase(); } /// Returns the ptr type of the operation. @@ -418,7 +443,7 @@ def Ptr_ScatterOp : Pointer_Op<"scatter", [ // Scatter values to multiple memory locations ptr.scatter %value, %ptrs, %mask : vector<4xf32>, vector<4x!ptr.ptr<#ptr.generic_space>> - + // Scatter with alignment ptr.scatter %value, %ptrs, %mask alignment = 8 : vector<4xf32>, vector<4x!ptr.ptr<#ptr.generic_space>> diff --git a/mlir/include/mlir/IR/DialectImplementation.h b/mlir/include/mlir/IR/DialectImplementation.h index f45b88dc6deca..0b4f91cd750b8 100644 --- a/mlir/include/mlir/IR/DialectImplementation.h +++ b/mlir/include/mlir/IR/DialectImplementation.h @@ -103,10 +103,11 @@ struct FieldParser< /// Parse any integer. template -struct FieldParser::value, IntT>> { +struct FieldParser::value || + std::is_same_v), + IntT>> { static FailureOr parse(AsmParser &parser) { - IntT value = 0; + IntT value{}; if (parser.parseInteger(value)) return failure(); return value; diff --git a/mlir/lib/Dialect/Ptr/IR/PtrDialect.cpp b/mlir/lib/Dialect/Ptr/IR/PtrDialect.cpp index 284c998690170..f0209af8a1ca3 100644 --- a/mlir/lib/Dialect/Ptr/IR/PtrDialect.cpp +++ b/mlir/lib/Dialect/Ptr/IR/PtrDialect.cpp @@ -56,6 +56,12 @@ verifyAlignment(std::optional alignment, return success(); } +//===----------------------------------------------------------------------===// +// ConstantOp +//===----------------------------------------------------------------------===// + +OpFoldResult ConstantOp::fold(FoldAdaptor adaptor) { return getValue(); } + //===----------------------------------------------------------------------===// // FromPtrOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/Ptr/PtrToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/Ptr/PtrToLLVMIRTranslation.cpp index d777667022a98..7e610cd42e931 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/Ptr/PtrToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/Ptr/PtrToLLVMIRTranslation.cpp @@ -29,7 +29,7 @@ namespace { /// Converts ptr::AtomicOrdering to llvm::AtomicOrdering static llvm::AtomicOrdering -convertAtomicOrdering(ptr::AtomicOrdering ordering) { +translateAtomicOrdering(ptr::AtomicOrdering ordering) { switch (ordering) { case ptr::AtomicOrdering::not_atomic: return llvm::AtomicOrdering::NotAtomic; @@ -49,10 +49,10 @@ convertAtomicOrdering(ptr::AtomicOrdering ordering) { llvm_unreachable("Unknown atomic ordering"); } -/// Convert ptr.ptr_add operation +/// Translate ptr.ptr_add operation to LLVM IR. static LogicalResult -convertPtrAddOp(PtrAddOp ptrAddOp, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { +translatePtrAddOp(PtrAddOp ptrAddOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { llvm::Value *basePtr = moduleTranslation.lookupValue(ptrAddOp.getBase()); llvm::Value *offset = moduleTranslation.lookupValue(ptrAddOp.getOffset()); @@ -83,18 +83,19 @@ convertPtrAddOp(PtrAddOp ptrAddOp, llvm::IRBuilderBase &builder, return success(); } -/// Convert ptr.load operation -static LogicalResult convertLoadOp(LoadOp loadOp, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { +/// Translate ptr.load operation to LLVM IR. +static LogicalResult +translateLoadOp(LoadOp loadOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { llvm::Value *ptr = moduleTranslation.lookupValue(loadOp.getPtr()); if (!ptr) return loadOp.emitError("Failed to lookup pointer operand"); - // Convert result type to LLVM type + // Translate result type to LLVM type llvm::Type *resultType = moduleTranslation.convertType(loadOp.getValue().getType()); if (!resultType) - return loadOp.emitError("Failed to convert result type"); + return loadOp.emitError("Failed to translate result type"); // Create the load instruction. llvm::MaybeAlign alignment(loadOp.getAlignment().value_or(0)); @@ -102,7 +103,7 @@ static LogicalResult convertLoadOp(LoadOp loadOp, llvm::IRBuilderBase &builder, resultType, ptr, alignment, loadOp.getVolatile_()); // Set op flags and metadata. - loadInst->setAtomic(convertAtomicOrdering(loadOp.getOrdering())); + loadInst->setAtomic(translateAtomicOrdering(loadOp.getOrdering())); // Set sync scope if specified if (loadOp.getSyncscope().has_value()) { llvm::LLVMContext &ctx = builder.getContext(); @@ -135,10 +136,10 @@ static LogicalResult convertLoadOp(LoadOp loadOp, llvm::IRBuilderBase &builder, return success(); } -/// Convert ptr.store operation +/// Translate ptr.store operation to LLVM IR. static LogicalResult -convertStoreOp(StoreOp storeOp, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { +translateStoreOp(StoreOp storeOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { llvm::Value *value = moduleTranslation.lookupValue(storeOp.getValue()); llvm::Value *ptr = moduleTranslation.lookupValue(storeOp.getPtr()); @@ -151,7 +152,7 @@ convertStoreOp(StoreOp storeOp, llvm::IRBuilderBase &builder, builder.CreateAlignedStore(value, ptr, alignment, storeOp.getVolatile_()); // Set op flags and metadata. - storeInst->setAtomic(convertAtomicOrdering(storeOp.getOrdering())); + storeInst->setAtomic(translateAtomicOrdering(storeOp.getOrdering())); // Set sync scope if specified if (storeOp.getSyncscope().has_value()) { llvm::LLVMContext &ctx = builder.getContext(); @@ -178,21 +179,21 @@ convertStoreOp(StoreOp storeOp, llvm::IRBuilderBase &builder, return success(); } -/// Convert ptr.type_offset operation +/// Translate ptr.type_offset operation to LLVM IR. static LogicalResult -convertTypeOffsetOp(TypeOffsetOp typeOffsetOp, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { - // Convert the element type to LLVM type +translateTypeOffsetOp(TypeOffsetOp typeOffsetOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + // Translate the element type to LLVM type llvm::Type *elementType = moduleTranslation.convertType(typeOffsetOp.getElementType()); if (!elementType) - return typeOffsetOp.emitError("Failed to convert the element type"); + return typeOffsetOp.emitError("Failed to translate the element type"); - // Convert result type + // Translate result type llvm::Type *resultType = moduleTranslation.convertType(typeOffsetOp.getResult().getType()); if (!resultType) - return typeOffsetOp.emitError("Failed to convert the result type"); + return typeOffsetOp.emitError("Failed to translate the result type"); // Use GEP with null pointer to compute type size/offset. llvm::Value *nullPtr = llvm::Constant::getNullValue(builder.getPtrTy(0)); @@ -204,10 +205,10 @@ convertTypeOffsetOp(TypeOffsetOp typeOffsetOp, llvm::IRBuilderBase &builder, return success(); } -/// Convert ptr.gather operation +/// Translate ptr.gather operation to LLVM IR. static LogicalResult -convertGatherOp(GatherOp gatherOp, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { +translateGatherOp(GatherOp gatherOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { llvm::Value *ptrs = moduleTranslation.lookupValue(gatherOp.getPtrs()); llvm::Value *mask = moduleTranslation.lookupValue(gatherOp.getMask()); llvm::Value *passthrough = @@ -216,11 +217,11 @@ convertGatherOp(GatherOp gatherOp, llvm::IRBuilderBase &builder, if (!ptrs || !mask || !passthrough) return gatherOp.emitError("Failed to lookup operands"); - // Convert result type to LLVM type. + // Translate result type to LLVM type. llvm::Type *resultType = moduleTranslation.convertType(gatherOp.getResult().getType()); if (!resultType) - return gatherOp.emitError("Failed to convert result type"); + return gatherOp.emitError("Failed to translate result type"); // Get the alignment. llvm::MaybeAlign alignment(gatherOp.getAlignment().value_or(0)); @@ -233,10 +234,10 @@ convertGatherOp(GatherOp gatherOp, llvm::IRBuilderBase &builder, return success(); } -/// Convert ptr.masked_load operation +/// Translate ptr.masked_load operation to LLVM IR. static LogicalResult -convertMaskedLoadOp(MaskedLoadOp maskedLoadOp, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { +translateMaskedLoadOp(MaskedLoadOp maskedLoadOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { llvm::Value *ptr = moduleTranslation.lookupValue(maskedLoadOp.getPtr()); llvm::Value *mask = moduleTranslation.lookupValue(maskedLoadOp.getMask()); llvm::Value *passthrough = @@ -245,11 +246,11 @@ convertMaskedLoadOp(MaskedLoadOp maskedLoadOp, llvm::IRBuilderBase &builder, if (!ptr || !mask || !passthrough) return maskedLoadOp.emitError("Failed to lookup operands"); - // Convert result type to LLVM type. + // Translate result type to LLVM type. llvm::Type *resultType = moduleTranslation.convertType(maskedLoadOp.getResult().getType()); if (!resultType) - return maskedLoadOp.emitError("Failed to convert result type"); + return maskedLoadOp.emitError("Failed to translate result type"); // Get the alignment. llvm::MaybeAlign alignment(maskedLoadOp.getAlignment().value_or(0)); @@ -262,10 +263,11 @@ convertMaskedLoadOp(MaskedLoadOp maskedLoadOp, llvm::IRBuilderBase &builder, return success(); } -/// Convert ptr.masked_store operation +/// Translate ptr.masked_store operation to LLVM IR. static LogicalResult -convertMaskedStoreOp(MaskedStoreOp maskedStoreOp, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { +translateMaskedStoreOp(MaskedStoreOp maskedStoreOp, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { llvm::Value *value = moduleTranslation.lookupValue(maskedStoreOp.getValue()); llvm::Value *ptr = moduleTranslation.lookupValue(maskedStoreOp.getPtr()); llvm::Value *mask = moduleTranslation.lookupValue(maskedStoreOp.getMask()); @@ -281,10 +283,10 @@ convertMaskedStoreOp(MaskedStoreOp maskedStoreOp, llvm::IRBuilderBase &builder, return success(); } -/// Convert ptr.scatter operation +/// Translate ptr.scatter operation to LLVM IR. static LogicalResult -convertScatterOp(ScatterOp scatterOp, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { +translateScatterOp(ScatterOp scatterOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { llvm::Value *value = moduleTranslation.lookupValue(scatterOp.getValue()); llvm::Value *ptrs = moduleTranslation.lookupValue(scatterOp.getPtrs()); llvm::Value *mask = moduleTranslation.lookupValue(scatterOp.getMask()); @@ -300,7 +302,56 @@ convertScatterOp(ScatterOp scatterOp, llvm::IRBuilderBase &builder, return success(); } -/// Implementation of the dialect interface that converts operations belonging +/// Translate ptr.constant operation to LLVM IR. +static LogicalResult +translateConstantOp(ConstantOp constantOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + // Translate result type to LLVM type + llvm::PointerType *resultType = dyn_cast_or_null( + moduleTranslation.convertType(constantOp.getResult().getType())); + if (!resultType) + return constantOp.emitError("Expected a valid pointer type"); + + llvm::Value *result = nullptr; + + TypedAttr value = constantOp.getValue(); + if (auto nullAttr = dyn_cast(value)) { + // Create a null pointer constant + result = llvm::ConstantPointerNull::get(resultType); + } else if (auto addressAttr = dyn_cast(value)) { + // Create an integer constant and translate it to pointer + llvm::APInt addressValue = addressAttr.getValue(); + + // Determine the integer type width based on the target's pointer size + llvm::DataLayout dataLayout = + moduleTranslation.getLLVMModule()->getDataLayout(); + unsigned pointerSizeInBits = + dataLayout.getPointerSizeInBits(resultType->getAddressSpace()); + + // Extend or truncate the address value to match pointer size if needed + if (addressValue.getBitWidth() != pointerSizeInBits) { + if (addressValue.getBitWidth() > pointerSizeInBits) { + constantOp.emitWarning() + << "Truncating address value to fit pointer size"; + } + addressValue = addressValue.getBitWidth() < pointerSizeInBits + ? addressValue.zext(pointerSizeInBits) + : addressValue.trunc(pointerSizeInBits); + } + + // Create integer constant and translate to pointer + llvm::Type *intType = builder.getIntNTy(pointerSizeInBits); + llvm::Value *intValue = llvm::ConstantInt::get(intType, addressValue); + result = builder.CreateIntToPtr(intValue, resultType); + } else { + return constantOp.emitError("Unsupported constant attribute type"); + } + + moduleTranslation.mapValue(constantOp.getResult(), result); + return success(); +} + +/// Implementation of the dialect interface that translates operations belonging /// to the `ptr` dialect to LLVM IR. class PtrDialectLLVMIRTranslationInterface : public LLVMTranslationDialectInterface { @@ -314,30 +365,35 @@ class PtrDialectLLVMIRTranslationInterface LLVM::ModuleTranslation &moduleTranslation) const final { return llvm::TypeSwitch(op) + .Case([&](ConstantOp constantOp) { + return translateConstantOp(constantOp, builder, moduleTranslation); + }) .Case([&](PtrAddOp ptrAddOp) { - return convertPtrAddOp(ptrAddOp, builder, moduleTranslation); + return translatePtrAddOp(ptrAddOp, builder, moduleTranslation); }) .Case([&](LoadOp loadOp) { - return convertLoadOp(loadOp, builder, moduleTranslation); + return translateLoadOp(loadOp, builder, moduleTranslation); }) .Case([&](StoreOp storeOp) { - return convertStoreOp(storeOp, builder, moduleTranslation); + return translateStoreOp(storeOp, builder, moduleTranslation); }) .Case([&](TypeOffsetOp typeOffsetOp) { - return convertTypeOffsetOp(typeOffsetOp, builder, moduleTranslation); + return translateTypeOffsetOp(typeOffsetOp, builder, + moduleTranslation); }) .Case([&](GatherOp gatherOp) { - return convertGatherOp(gatherOp, builder, moduleTranslation); + return translateGatherOp(gatherOp, builder, moduleTranslation); }) .Case([&](MaskedLoadOp maskedLoadOp) { - return convertMaskedLoadOp(maskedLoadOp, builder, moduleTranslation); + return translateMaskedLoadOp(maskedLoadOp, builder, + moduleTranslation); }) .Case([&](MaskedStoreOp maskedStoreOp) { - return convertMaskedStoreOp(maskedStoreOp, builder, - moduleTranslation); + return translateMaskedStoreOp(maskedStoreOp, builder, + moduleTranslation); }) .Case([&](ScatterOp scatterOp) { - return convertScatterOp(scatterOp, builder, moduleTranslation); + return translateScatterOp(scatterOp, builder, moduleTranslation); }) .Default([&](Operation *op) { return op->emitError("Translation for operation '") diff --git a/mlir/test/Dialect/Ptr/ops.mlir b/mlir/test/Dialect/Ptr/ops.mlir index 51e5ac3ae691d..7b2254185f57c 100644 --- a/mlir/test/Dialect/Ptr/ops.mlir +++ b/mlir/test/Dialect/Ptr/ops.mlir @@ -114,7 +114,7 @@ func.func @masked_store_ops_tensor(%value: tensor<8xi64>, %ptr: !ptr.ptr<#ptr.ge } /// Test operations with LLVM address space -func.func @llvm_masked_ops(%ptr: !ptr.ptr<#llvm.address_space<3>>, %ptrs: vector<4x!ptr.ptr<#llvm.address_space<3>>>, +func.func @llvm_masked_ops(%ptr: !ptr.ptr<#llvm.address_space<3>>, %ptrs: vector<4x!ptr.ptr<#llvm.address_space<3>>>, %mask: vector<4xi1>, %value: vector<4xf32>, %passthrough: vector<4xf32>) -> vector<4xf32> { // Gather from shared memory (address space 3) %0 = ptr.gather %ptrs, %mask, %passthrough alignment = 4 : vector<4x!ptr.ptr<#llvm.address_space<3>>> -> vector<4xf32> @@ -189,3 +189,25 @@ func.func @ptr_add_tensor_base_scalar_offset(%ptrs: tensor<8x!ptr.ptr<#ptr.gener %res3 = ptr.ptr_add inbounds %ptrs, %offset : tensor<8x!ptr.ptr<#ptr.generic_space>>, i64 return %res : tensor<8x!ptr.ptr<#ptr.generic_space>> } + +/// Test constant operations with null pointer +func.func @constant_null_ops() -> (!ptr.ptr<#ptr.generic_space>, !ptr.ptr<#llvm.address_space<1>>) { + %null_generic = ptr.constant #ptr.null : !ptr.ptr<#ptr.generic_space> + %null_as1 = ptr.constant #ptr.null : !ptr.ptr<#llvm.address_space<1>> + return %null_generic, %null_as1 : !ptr.ptr<#ptr.generic_space>, !ptr.ptr<#llvm.address_space<1>> +} + +/// Test constant operations with address values +func.func @constant_address_ops() -> (!ptr.ptr<#ptr.generic_space>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<3>>) { + %addr_0 = ptr.constant #ptr.address<0> : !ptr.ptr<#ptr.generic_space> + %addr_1000 = ptr.constant #ptr.address<0x1000> : !ptr.ptr<#llvm.address_space<1>> + %addr_deadbeef = ptr.constant #ptr.address<0xDEADBEEF> : !ptr.ptr<#llvm.address_space<3>> + return %addr_0, %addr_1000, %addr_deadbeef : !ptr.ptr<#ptr.generic_space>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<3>> +} + +/// Test constant operations with large address values +func.func @constant_large_address_ops() -> (!ptr.ptr<#ptr.generic_space>, !ptr.ptr<#llvm.address_space<0>>) { + %addr_max32 = ptr.constant #ptr.address<0xFFFFFFFF> : !ptr.ptr<#ptr.generic_space> + %addr_large = ptr.constant #ptr.address<0x123456789ABCDEF0> : !ptr.ptr<#llvm.address_space<0>> + return %addr_max32, %addr_large : !ptr.ptr<#ptr.generic_space>, !ptr.ptr<#llvm.address_space<0>> +} diff --git a/mlir/test/Target/LLVMIR/ptr.mlir b/mlir/test/Target/LLVMIR/ptr.mlir index 9b99dd8e3a3eb..2fa794130ec52 100644 --- a/mlir/test/Target/LLVMIR/ptr.mlir +++ b/mlir/test/Target/LLVMIR/ptr.mlir @@ -41,10 +41,10 @@ llvm.func @type_offset(%arg0: !ptr.ptr<#llvm.address_space<0>>) -> !llvm.struct< %2 = ptr.type_offset i16 : i32 %3 = ptr.type_offset i32 : i32 %4 = llvm.mlir.poison : !llvm.struct<(i32, i32, i32, i32)> - %5 = llvm.insertvalue %0, %4[0] : !llvm.struct<(i32, i32, i32, i32)> - %6 = llvm.insertvalue %1, %5[1] : !llvm.struct<(i32, i32, i32, i32)> - %7 = llvm.insertvalue %2, %6[2] : !llvm.struct<(i32, i32, i32, i32)> - %8 = llvm.insertvalue %3, %7[3] : !llvm.struct<(i32, i32, i32, i32)> + %5 = llvm.insertvalue %0, %4[0] : !llvm.struct<(i32, i32, i32, i32)> + %6 = llvm.insertvalue %1, %5[1] : !llvm.struct<(i32, i32, i32, i32)> + %7 = llvm.insertvalue %2, %6[2] : !llvm.struct<(i32, i32, i32, i32)> + %8 = llvm.insertvalue %3, %7[3] : !llvm.struct<(i32, i32, i32, i32)> llvm.return %8 : !llvm.struct<(i32, i32, i32, i32)> } @@ -194,7 +194,7 @@ llvm.func @scatter_ops_i64(%value: vector<8xi64>, %ptrs: vector<8x!ptr.ptr<#llvm // CHECK-NEXT: call void @llvm.masked.store.v4f64.p3(<4 x double> %[[VALUE_F64]], ptr addrspace(3) %[[PTR_SHARED]], i32 8, <4 x i1> %[[MASK]]) // CHECK-NEXT: ret void // CHECK-NEXT: } -llvm.func @mixed_masked_ops_address_spaces(%ptr: !ptr.ptr<#llvm.address_space<3>>, %ptrs: vector<4x!ptr.ptr<#llvm.address_space<3>>>, +llvm.func @mixed_masked_ops_address_spaces(%ptr: !ptr.ptr<#llvm.address_space<3>>, %ptrs: vector<4x!ptr.ptr<#llvm.address_space<3>>>, %mask: vector<4xi1>, %value: vector<4xf64>, %passthrough: vector<4xf64>) { // Test with shared memory address space (3) and f64 elements %0 = ptr.gather %ptrs, %mask, %passthrough alignment = 8 : vector<4x!ptr.ptr<#llvm.address_space<3>>> -> vector<4xf64> @@ -255,3 +255,29 @@ llvm.func @llvm_ops_with_ptr_nvvm_values(%arg0: !llvm.ptr) { llvm.store %1, %arg0 : !ptr.ptr<#nvvm.memory_space>, !llvm.ptr llvm.return } + +// CHECK-LABEL: define { ptr, ptr addrspace(1), ptr addrspace(2) } @constant_address_op() { +// CHECK-NEXT: ret { ptr, ptr addrspace(1), ptr addrspace(2) } { ptr null, ptr addrspace(1) inttoptr (i64 4096 to ptr addrspace(1)), ptr addrspace(2) inttoptr (i64 3735928559 to ptr addrspace(2)) } +llvm.func @constant_address_op() -> + !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, + !ptr.ptr<#llvm.address_space<1>>, + !ptr.ptr<#llvm.address_space<2>>)> { + %0 = ptr.constant #ptr.null : !ptr.ptr<#llvm.address_space<0>> + %1 = ptr.constant #ptr.address<0x1000> : !ptr.ptr<#llvm.address_space<1>> + %2 = ptr.constant #ptr.address<3735928559> : !ptr.ptr<#llvm.address_space<2>> + %3 = llvm.mlir.poison : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> + %4 = llvm.insertvalue %0, %3[0] : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> + %5 = llvm.insertvalue %1, %4[1] : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> + %6 = llvm.insertvalue %2, %5[2] : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> + llvm.return %6 : !llvm.struct<(!ptr.ptr<#llvm.address_space<0>>, !ptr.ptr<#llvm.address_space<1>>, !ptr.ptr<#llvm.address_space<2>>)> +} + +// Test gep folders. +// CHECK-LABEL: define ptr @ptr_add_cst() { +// CHECK-NEXT: ret ptr inttoptr (i64 42 to ptr) +llvm.func @ptr_add_cst() -> !ptr.ptr<#llvm.address_space<0>> { + %off = llvm.mlir.constant(42 : i32) : i32 + %ptr = ptr.constant #ptr.null : !ptr.ptr<#llvm.address_space<0>> + %res = ptr.ptr_add %ptr, %off : !ptr.ptr<#llvm.address_space<0>>, i32 + llvm.return %res : !ptr.ptr<#llvm.address_space<0>> +} From 3e254ed9041530cf14594f33f6c87a37c8a96640 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0=C3=A1rka=20Holendov=C3=A1?= <72979252+mlir-maiden@users.noreply.github.com> Date: Sun, 14 Sep 2025 12:30:31 -0400 Subject: [PATCH 257/734] [flang] Implement DSECNDS intrinsic (PGI extension) (#157573) Add support for DSECNDS, the double-precision variant of SECNDS. The implementation mirrors SECNDS, reusing the shared `SecndsImpl` runtime template. Includes: - Registration in intrinsics table - Lowering handler and runtime call wiring - Hook into shared SecndsImpl in extensions.cpp - Documentation in Intrinsics.md - Regression test dsecnds.f90 CC @eugeneepshteyn @klausler --------- Co-authored-by: Eugene Epshteyn --- flang-rt/lib/runtime/extensions.cpp | 13 +++++++- flang/docs/Intrinsics.md | 26 +++++++++++++++ .../flang/Optimizer/Builder/IntrinsicCall.h | 2 ++ .../Optimizer/Builder/Runtime/Intrinsics.h | 4 +++ flang/include/flang/Runtime/extensions.h | 4 +++ flang/lib/Evaluate/intrinsics.cpp | 4 +++ flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 21 ++++++++++++ .../Optimizer/Builder/Runtime/Intrinsics.cpp | 17 ++++++++++ flang/test/Lower/Intrinsics/dsecnds.f90 | 33 +++++++++++++++++++ 9 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/Intrinsics/dsecnds.f90 diff --git a/flang-rt/lib/runtime/extensions.cpp b/flang-rt/lib/runtime/extensions.cpp index be0eed6f49dc8..2c42597a56541 100644 --- a/flang-rt/lib/runtime/extensions.cpp +++ b/flang-rt/lib/runtime/extensions.cpp @@ -60,7 +60,7 @@ inline void CtimeBuffer(char *buffer, size_t bufsize, const time_t cur_time, namespace Fortran::runtime { -// Common implementation that could be used for either SECNDS() or SECNDSD(), +// Common implementation that could be used for either SECNDS() or DSECNDS(), // which are defined for float or double. template T SecndsImpl(T *refTime) { static_assert(std::is_same::value || std::is_same::value, @@ -381,6 +381,17 @@ float RTNAME(Secnds)(float *refTime, const char *sourceFile, int line) { return FORTRAN_PROCEDURE_NAME(secnds)(refTime); } +// PGI extension function DSECNDS(refTime) +double FORTRAN_PROCEDURE_NAME(dsecnds)(double *refTime) { + return SecndsImpl(refTime); +} + +double RTNAME(Dsecnds)(double *refTime, const char *sourceFile, int line) { + Terminator terminator{sourceFile, line}; + RUNTIME_CHECK(terminator, refTime != nullptr); + return FORTRAN_PROCEDURE_NAME(dsecnds)(refTime); +} + // GNU extension function TIME() std::int64_t RTNAME(time)() { return time(nullptr); } diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md index 4b000877e7844..3314d1bcc64a2 100644 --- a/flang/docs/Intrinsics.md +++ b/flang/docs/Intrinsics.md @@ -1149,6 +1149,32 @@ PROGRAM example_secnds PRINT *, "Elapsed seconds:", elapsed END PROGRAM example_secnds ``` +### Non-Standard Intrinsics: DSECNDS +#### Description +`DSECNDS(refTime)` is the double precision variant of `SECNDS`. It returns the number of seconds +since midnight minus a user-supplied reference time `refTime`. Uses `REAL(KIND=8)` for higher precision. + +#### Usage and Info +- **Standard:** PGI extension +- **Class:** function +- **Syntax:** result = `DSECNDS(refTime)` +- **Arguments:** + +| ARGUMENT | INTENT | TYPE | KIND | Description | +|-----------|--------|---------------|-------------------------|------------------------------------------| +| `refTime` | `IN` | `REAL, scalar`| REAL(KIND=8), required | Reference time in seconds since midnight | + +- **Return Value:** REAL(KIND=8), scalar — seconds elapsed since `refTime`. +- **Purity:** Impure + +#### Example +```fortran +PROGRAM example_dsecnds + DOUBLE PRECISION :: refTime + refTime = 0.0D0 + PRINT '(F24.15)', DSECNDS(refTime) +END PROGRAM example_dsecnds +``` ### Non-standard Intrinsics: SECOND This intrinsic is an alias for `CPU_TIME`: supporting both a subroutine and a diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index d80ee9e861321..320f913858956 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -253,6 +253,8 @@ struct IntrinsicLibrary { mlir::Value genCosd(mlir::Type, llvm::ArrayRef); mlir::Value genCospi(mlir::Type, llvm::ArrayRef); void genDateAndTime(llvm::ArrayRef); + fir::ExtendedValue genDsecnds(mlir::Type resultType, + llvm::ArrayRef args); mlir::Value genDim(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genDotProduct(mlir::Type, llvm::ArrayRef); diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h index 548ee4bb65818..7a97172cfbb9a 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h @@ -44,6 +44,10 @@ void genDateAndTime(fir::FirOpBuilder &, mlir::Location, std::optional date, std::optional time, std::optional zone, mlir::Value values); + +mlir::Value genDsecnds(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value refTime); + void genEtime(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value values, mlir::Value time); diff --git a/flang/include/flang/Runtime/extensions.h b/flang/include/flang/Runtime/extensions.h index 9a100cec9e6b9..7e4201f15171f 100644 --- a/flang/include/flang/Runtime/extensions.h +++ b/flang/include/flang/Runtime/extensions.h @@ -28,6 +28,10 @@ typedef std::uint32_t gid_t; extern "C" { +// PGI extension function DSECNDS(refTime) +double FORTRAN_PROCEDURE_NAME(dsecnds)(double *refTime); +double RTNAME(Dsecnds)(double *refTime, const char *sourceFile, int line); + // CALL FLUSH(n) antedates the Fortran 2003 FLUSH statement. void FORTRAN_PROCEDURE_NAME(flush)(const int &unit); diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index abe53c31210d0..c7f174f7989dd 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -462,6 +462,10 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"vector_b", AnyNumeric, Rank::vector}}, ResultNumeric, Rank::scalar, IntrinsicClass::transformationalFunction}, {"dprod", {{"x", DefaultReal}, {"y", DefaultReal}}, DoublePrecision}, + {"dsecnds", + {{"refTime", TypePattern{RealType, KindCode::exactKind, 8}, + Rank::scalar}}, + TypePattern{RealType, KindCode::exactKind, 8}, Rank::scalar}, {"dshiftl", {{"i", SameIntOrUnsigned}, {"j", SameIntOrUnsigned, Rank::elementalOrBOZ}, {"shift", AnyInt}}, diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index aa12dbff5935b..ce1376fd209cc 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -455,6 +455,10 @@ static constexpr IntrinsicHandler handlers[]{ {{{"vector_a", asBox}, {"vector_b", asBox}}}, /*isElemental=*/false}, {"dprod", &I::genDprod}, + {"dsecnds", + &I::genDsecnds, + {{{"refTime", asAddr}}}, + /*isElemental=*/false}, {"dshiftl", &I::genDshiftl}, {"dshiftr", &I::genDshiftr}, {"eoshift", @@ -4048,6 +4052,23 @@ mlir::Value IntrinsicLibrary::genDprod(mlir::Type resultType, return mlir::arith::MulFOp::create(builder, loc, a, b); } +// DSECNDS +// Double precision variant of SECNDS (PGI extension) +fir::ExtendedValue +IntrinsicLibrary::genDsecnds(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 1 && "DSECNDS expects one argument"); + + mlir::Value refTime = fir::getBase(args[0]); + + if (!refTime) + fir::emitFatalError(loc, "expected REFERENCE TIME parameter"); + + mlir::Value result = fir::runtime::genDsecnds(builder, loc, refTime); + + return builder.createConvert(loc, resultType, result); +} + // DSHIFTL mlir::Value IntrinsicLibrary::genDshiftl(mlir::Type resultType, llvm::ArrayRef args) { diff --git a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp index dc61903ddd369..110b1b20898c7 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp @@ -106,6 +106,23 @@ void fir::runtime::genDateAndTime(fir::FirOpBuilder &builder, fir::CallOp::create(builder, loc, callee, args); } +mlir::Value fir::runtime::genDsecnds(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value refTime) { + auto runtimeFunc = + fir::runtime::getRuntimeFunc(loc, builder); + + mlir::FunctionType runtimeFuncTy = runtimeFunc.getFunctionType(); + + mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); + mlir::Value sourceLine = + fir::factory::locationToLineNo(builder, loc, runtimeFuncTy.getInput(2)); + + llvm::SmallVector args = {refTime, sourceFile, sourceLine}; + args = fir::runtime::createArguments(builder, loc, runtimeFuncTy, args); + + return fir::CallOp::create(builder, loc, runtimeFunc, args).getResult(0); +} + void fir::runtime::genEtime(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value values, mlir::Value time) { auto runtimeFunc = fir::runtime::getRuntimeFunc(loc, builder); diff --git a/flang/test/Lower/Intrinsics/dsecnds.f90 b/flang/test/Lower/Intrinsics/dsecnds.f90 new file mode 100644 index 0000000000000..03814ff60bd80 --- /dev/null +++ b/flang/test/Lower/Intrinsics/dsecnds.f90 @@ -0,0 +1,33 @@ +! RUN: bbc -emit-hlfir %s -o - | FileCheck %s + +! CHECK-LABEL: func.func @_QPuse_dsecnds( +! CHECK-SAME: %[[arg0:.*]]: !fir.ref +function use_dsecnds(refTime) result(elapsed) + double precision :: refTime, elapsed + elapsed = dsecnds(refTime) +end function + +! The argument is lowered with hlfir.declare, which returns two results. +! Capture it here to check that the correct SSA value (%...#0) +! is passed to the runtime call later +! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[arg0]] dummy_scope + +! The file name and source line are also lowered and passed as runtime arguments +! Capture the constant line number and convert the file name to i8*. +! CHECK: %[[STRADDR:.*]] = fir.address_of( +! CHECK: %[[LINE:.*]] = arith.constant {{.*}} : i32 +! CHECK: %[[FNAME8:.*]] = fir.convert %[[STRADDR]] : (!fir.ref>) -> !fir.ref + +! Verify the runtime call is made with: +! - the declared refTime value (%[[DECL]]#0) +! - the converted filename +! - the source line constant +! CHECK: %[[CALL:.*]] = fir.call @_FortranADsecnds(%[[DECL]]#0, %[[FNAME8]], %[[LINE]]) {{.*}} : (!fir.ref, !fir.ref, i32) -> f64 + +! Ensure there is no illegal conversion of a value result into a reference +! CHECK-NOT: fir.convert {{.*}} : (f64) -> !fir.ref + +! Confirm the function result is returned as a plain f64 +! CHECK: return {{.*}} : f64 + + From 8007022caf47372abc73865b3b90e888c23983ad Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 21 Aug 2025 05:09:49 -0700 Subject: [PATCH 258/734] [MLIR] Apply clang-tidy fixes for performance-unnecessary-copy-initialization in Linalg.cpp (NFC) --- mlir/lib/CAPI/Dialect/Linalg.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/CAPI/Dialect/Linalg.cpp b/mlir/lib/CAPI/Dialect/Linalg.cpp index 21db18dfd47ed..5c2a65d2c4c8a 100644 --- a/mlir/lib/CAPI/Dialect/Linalg.cpp +++ b/mlir/lib/CAPI/Dialect/Linalg.cpp @@ -59,7 +59,7 @@ mlirLinalgInferContractionDimensions(MlirOperation op) { if (failed(maybeDims)) return result; - linalg::ContractionDimensions contractionDims = *maybeDims; + const linalg::ContractionDimensions &contractionDims = *maybeDims; MLIRContext *ctx = linalgOp.getContext(); auto toAttr = [&ctx](const SmallVector &vals) -> MlirAttribute { @@ -95,7 +95,7 @@ mlirLinalgInferConvolutionDimensions(MlirOperation op) { if (failed(maybeDims)) return result; - linalg::ConvolutionDimensions dims = *maybeDims; + const linalg::ConvolutionDimensions &dims = *maybeDims; MLIRContext *ctx = linalgOp.getContext(); auto toI32Attr = From d7bf2bf85f84977e4504322b61af3d5def211c51 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Sun, 14 Sep 2025 12:57:43 -0400 Subject: [PATCH 259/734] [NFC][mlir][ptr] Clarify pointer dialect semantics (#158484) This patch adds the following description to the pointer dialect: ``` The pointer dialect provides types and operations for representing and interacting with pointer values in MLIR, such as loading and storing values from/to memory addresses. The dialect's main type is an opaque pointer (`ptr`) that can be parameterized by a memory space. This type represents a handle to an object in memory, or target-dependent values like `nullptr`. Further, the dialect assumes that the minimum addressable unit by a pointer is a byte. However, the dialect does not make assumptions about the size of a byte, which is considered a target-specific property. ``` --- mlir/include/mlir/Dialect/Ptr/IR/PtrDialect.td | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mlir/include/mlir/Dialect/Ptr/IR/PtrDialect.td b/mlir/include/mlir/Dialect/Ptr/IR/PtrDialect.td index 7407d74ce3a87..c98df5775195a 100644 --- a/mlir/include/mlir/Dialect/Ptr/IR/PtrDialect.td +++ b/mlir/include/mlir/Dialect/Ptr/IR/PtrDialect.td @@ -21,6 +21,18 @@ include "mlir/IR/OpBase.td" def Ptr_Dialect : Dialect { let name = "ptr"; let summary = "Pointer dialect"; + let description = [{ + The pointer dialect provides types and operations for representing and + interacting with pointer values in MLIR, such as loading and storing values + from/to memory addresses. + + The dialect's main type is an opaque pointer (`ptr`) that can be + parameterized by a memory space. This type represents a handle to an object + in memory, or target-dependent values like `nullptr`. Further, the dialect + assumes that the minimum addressable unit by a pointer is a byte. However, + the dialect does not make assumptions about the size of a byte, which is + considered a target-specific property. + }]; let cppNamespace = "::mlir::ptr"; let useDefaultTypePrinterParser = 1; let useDefaultAttributePrinterParser = 1; From b01cddee0e69bd283a0f1830f24fae326371f1de Mon Sep 17 00:00:00 2001 From: Tomohiro Kashiwada Date: Mon, 15 Sep 2025 02:09:46 +0900 Subject: [PATCH 260/734] [Clang][Cygwin] Cygwin x86_64 should accept __stdcall (#158385) Cygwin should support calling convention attributes `__cdecl`, `__stdcall`, `__thiscall`, and `__fastcall`, even though they have no effect in x86_64, as done in MinGW. Originally reported in https://cygwin.com/pipermail/cygwin/2025-September/258782.html --------- Co-authored-by: Jeremy Drake --- clang/lib/Basic/Targets/X86.h | 23 ++++++++++++ clang/test/CodeGen/X86/cygwin-varargs.c | 35 ------------------- clang/test/CodeGen/calling-conv-ignored.c | 2 ++ clang/test/CodeGen/ms_abi.c | 2 ++ clang/test/CodeGen/sysv_abi.c | 4 +++ clang/test/DebugInfo/Generic/cc.c | 6 ++-- .../x64-windows-calling-convention-handling.c | 4 ++- clang/test/Sema/MicrosoftCompatibility-x64.c | 4 ++- 8 files changed, 41 insertions(+), 39 deletions(-) delete mode 100644 clang/test/CodeGen/X86/cygwin-varargs.c diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index d159a7906854c..be3a473174370 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -999,6 +999,29 @@ class LLVM_LIBRARY_VISIBILITY CygwinX86_64TargetInfo : public X86_64TargetInfo { Builder.defineMacro("_GNU_SOURCE"); } + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { + switch (CC) { + case CC_X86StdCall: + case CC_X86ThisCall: + case CC_X86FastCall: + return CCCR_Ignore; + case CC_C: + case CC_X86VectorCall: + case CC_IntelOclBicc: + case CC_PreserveMost: + case CC_PreserveAll: + case CC_PreserveNone: + case CC_X86_64SysV: + case CC_Swift: + case CC_SwiftAsync: + case CC_X86RegCall: + case CC_DeviceKernel: + return CCCR_OK; + default: + return CCCR_Warning; + } + } + BuiltinVaListKind getBuiltinVaListKind() const override { return TargetInfo::CharPtrBuiltinVaList; } diff --git a/clang/test/CodeGen/X86/cygwin-varargs.c b/clang/test/CodeGen/X86/cygwin-varargs.c deleted file mode 100644 index 4eea7d64bcb35..0000000000000 --- a/clang/test/CodeGen/X86/cygwin-varargs.c +++ /dev/null @@ -1,35 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-windows-gnu -emit-llvm < %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -emit-llvm < %s | FileCheck %s - -struct foo { - int x; - float y; - char z; -}; -// CHECK: %[[STRUCT_FOO:.*]] = type { i32, float, i8 } - -void f(int a, ...) { - // CHECK-LABEL: define dso_local void @f - __builtin_va_list ap; - __builtin_va_start(ap, a); - // CHECK: %[[AP:.*]] = alloca ptr - // CHECK: call void @llvm.va_start - int b = __builtin_va_arg(ap, int); - // CHECK: %[[AP_CUR:.*]] = load ptr, ptr %[[AP]] - // CHECK-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, ptr %[[AP_CUR]], i64 8 - // CHECK-NEXT: store ptr %[[AP_NEXT]], ptr %[[AP]] - double _Complex c = __builtin_va_arg(ap, double _Complex); - // CHECK: %[[AP_CUR2:.*]] = load ptr, ptr %[[AP]] - // CHECK-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, ptr %[[AP_CUR2]], i64 8 - // CHECK-NEXT: store ptr %[[AP_NEXT2]], ptr %[[AP]] - // CHECK-NEXT: load ptr, ptr %[[AP_CUR2]] - struct foo d = __builtin_va_arg(ap, struct foo); - // CHECK: %[[AP_CUR3:.*]] = load ptr, ptr %[[AP]] - // CHECK-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, ptr %[[AP_CUR3]], i64 8 - // CHECK-NEXT: store ptr %[[AP_NEXT3]], ptr %[[AP]] - __builtin_va_list ap2; - __builtin_va_copy(ap2, ap); - // CHECK: call void @llvm.va_copy - __builtin_va_end(ap); - // CHECK: call void @llvm.va_end -} diff --git a/clang/test/CodeGen/calling-conv-ignored.c b/clang/test/CodeGen/calling-conv-ignored.c index 9c47f641eaacb..5dbc7e4084c88 100644 --- a/clang/test/CodeGen/calling-conv-ignored.c +++ b/clang/test/CodeGen/calling-conv-ignored.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -o - %s | FileCheck %s --check-prefix=X86 // RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -o - %s | FileCheck %s --check-prefix=X64 +// RUN: %clang_cc1 -triple x86_64-windows-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=X64 +// RUN: %clang_cc1 -triple x86_64-cygwin -emit-llvm -o - %s | FileCheck %s --check-prefix=X64 // RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -o - %s -fdefault-calling-conv=vectorcall | FileCheck %s --check-prefix=X86-VEC // RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -o - %s -fdefault-calling-conv=vectorcall | FileCheck %s --check-prefix=X64-VEC diff --git a/clang/test/CodeGen/ms_abi.c b/clang/test/CodeGen/ms_abi.c index 5d58c9816da78..2047febabdb11 100644 --- a/clang/test/CodeGen/ms_abi.c +++ b/clang/test/CodeGen/ms_abi.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 -emit-llvm < %s | FileCheck -check-prefix=FREEBSD %s // RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm < %s | FileCheck -check-prefix=WIN64 %s +// RUN: %clang_cc1 -triple x86_64-mingw -emit-llvm < %s | FileCheck -check-prefix=WIN64 %s +// RUN: %clang_cc1 -triple x86_64-cygwin -emit-llvm < %s | FileCheck -check-prefix=WIN64 %s // RUN: %clang_cc1 -triple x86_64-uefi -emit-llvm < %s | FileCheck -check-prefix=WIN64 %s struct foo { diff --git a/clang/test/CodeGen/sysv_abi.c b/clang/test/CodeGen/sysv_abi.c index 29ea819c2aa26..a66ecc6e26242 100644 --- a/clang/test/CodeGen/sysv_abi.c +++ b/clang/test/CodeGen/sysv_abi.c @@ -1,7 +1,11 @@ // RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm -target-cpu skylake-avx512 < %s | FileCheck %s --check-prefixes=CHECK,AVX +// RUN: %clang_cc1 -triple x86_64-mingw -emit-llvm -target-cpu skylake-avx512 < %s | FileCheck %s --check-prefixes=CHECK,AVX +// RUN: %clang_cc1 -triple x86_64-cygwin -emit-llvm -target-cpu skylake-avx512 < %s | FileCheck %s --check-prefixes=CHECK,AVX // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-cpu skylake-avx512 < %s | FileCheck %s --check-prefixes=CHECK,AVX // RUN: %clang_cc1 -triple x86_64-uefi -emit-llvm -target-cpu skylake-avx512 < %s | FileCheck %s --check-prefixes=CHECK,AVX // RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,NOAVX +// RUN: %clang_cc1 -triple x86_64-mingw -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,NOAVX +// RUN: %clang_cc1 -triple x86_64-cygwin -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,NOAVX // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,NOAVX // RUN: %clang_cc1 -triple x86_64-uefi -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,NOAVX diff --git a/clang/test/DebugInfo/Generic/cc.c b/clang/test/DebugInfo/Generic/cc.c index 2bfb1c28e9353..e430e4c8ed87b 100644 --- a/clang/test/DebugInfo/Generic/cc.c +++ b/clang/test/DebugInfo/Generic/cc.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s --check-prefix=LINUX -// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s --check-prefix=WINDOWS +// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s --check-prefix=WINDOWS +// RUN: %clang_cc1 -triple x86_64-unknown-windows-gnu -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s --check-prefix=WINDOWS +// RUN: %clang_cc1 -triple x86_64-unknown-windows-cygnus -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s --check-prefix=WINDOWS // RUN: %clang_cc1 -triple i386-pc-linux-gnu -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s --check-prefix=LINUX32 // RUN: %clang_cc1 -triple armv7--linux-gnueabihf -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s --check-prefix=ARM @@ -77,7 +79,7 @@ __attribute__((intel_ocl_bicc)) int add_inteloclbicc(int a, int b) { } #endif -#ifdef _WIN64 +#if defined(_WIN64) || defined(__CYGWIN__) // WINDOWS: !DISubprogram({{.*}}"add_sysvabi", {{.*}}type: ![[FTY:[0-9]+]] // WINDOWS: ![[FTY]] = !DISubroutineType({{.*}}cc: DW_CC_LLVM_X86_64SysV, __attribute__((sysv_abi)) int add_sysvabi(int a, int b) { diff --git a/clang/test/Parser/x64-windows-calling-convention-handling.c b/clang/test/Parser/x64-windows-calling-convention-handling.c index c027663414829..224931c4eb91d 100644 --- a/clang/test/Parser/x64-windows-calling-convention-handling.c +++ b/clang/test/Parser/x64-windows-calling-convention-handling.c @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -triple x86_64-windows -fms-compatibility -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-windows -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-mingw -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-cygwin -fsyntax-only -verify %s int __cdecl cdecl(int a, int b, int c, int d) { // expected-no-diagnostics return a + b + c + d; diff --git a/clang/test/Sema/MicrosoftCompatibility-x64.c b/clang/test/Sema/MicrosoftCompatibility-x64.c index 7d1f64996eb3c..a422b549dcc00 100644 --- a/clang/test/Sema/MicrosoftCompatibility-x64.c +++ b/clang/test/Sema/MicrosoftCompatibility-x64.c @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 %s -Wmicrosoft -verify -fms-compatibility -triple x86_64-pc-win32 +// RUN: %clang_cc1 %s -Wmicrosoft -verify -triple x86_64-pc-win32 +// RUN: %clang_cc1 %s -Wmicrosoft -verify -triple x86_64-w64-mingw32 +// RUN: %clang_cc1 %s -Wmicrosoft -verify -triple x86_64-pc-cygwin // None of these should warn. stdcall is treated as equivalent to cdecl on // x64. From 31c4eb032f347868c5ba929ca42004e03dbded00 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 14 Sep 2025 10:55:52 -0700 Subject: [PATCH 261/734] [Github] Update actions/download-artifact to v5 (#158466) Some of the older versions have security issues. Upgrade to v5 to remedy that and get ahead of the v5 upgrade cycle rather than updating to a later v4.x release. Updated mechanically using the following command: find .github -exec sed -i -e "s/actions\/download-artifact@.*/actions\/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0/g" {} \; --- .github/workflows/build-ci-container-windows.yml | 2 +- .github/workflows/build-ci-container.yml | 2 +- .github/workflows/build-metrics-container.yml | 2 +- .github/workflows/libclang-abi-tests.yml | 4 ++-- .github/workflows/llvm-tests.yml | 6 +++--- .github/workflows/release-binaries-setup-stage/action.yml | 2 +- .github/workflows/release-binaries.yml | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-ci-container-windows.yml b/.github/workflows/build-ci-container-windows.yml index 55a269c001c2b..167e7cf06b3b2 100644 --- a/.github/workflows/build-ci-container-windows.yml +++ b/.github/workflows/build-ci-container-windows.yml @@ -61,7 +61,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - name: Download container - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: name: container - name: Push Container diff --git a/.github/workflows/build-ci-container.yml b/.github/workflows/build-ci-container.yml index 3e91c49a51d19..67f35fd30701f 100644 --- a/.github/workflows/build-ci-container.yml +++ b/.github/workflows/build-ci-container.yml @@ -88,7 +88,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - name: Download container - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 - name: Push Container run: | diff --git a/.github/workflows/build-metrics-container.yml b/.github/workflows/build-metrics-container.yml index 265fd73cc0bb7..cadcaa9a42e8f 100644 --- a/.github/workflows/build-metrics-container.yml +++ b/.github/workflows/build-metrics-container.yml @@ -66,7 +66,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - name: Download Container - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: name: container - name: Push Container diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml index 3836cc56a7c22..c159fb584fdcd 100644 --- a/.github/workflows/libclang-abi-tests.yml +++ b/.github/workflows/libclang-abi-tests.yml @@ -144,12 +144,12 @@ jobs: - abi-dump steps: - name: Download baseline - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: name: build-baseline path: build-baseline - name: Download latest - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: name: build-latest path: build-latest diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 52b486e7e62fc..b6c30b342bbe8 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -148,17 +148,17 @@ jobs: - abi-dump steps: - name: Download baseline - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: name: build-baseline path: build-baseline - name: Download latest - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: name: build-latest path: build-latest - name: Download symbol list - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # 4.1.8 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: name: symbol-list path: symbol-list diff --git a/.github/workflows/release-binaries-setup-stage/action.yml b/.github/workflows/release-binaries-setup-stage/action.yml index f5e5db27e6595..8f45e22886b6e 100644 --- a/.github/workflows/release-binaries-setup-stage/action.yml +++ b/.github/workflows/release-binaries-setup-stage/action.yml @@ -44,7 +44,7 @@ runs: - name: Download Previous Stage Artifact if: ${{ inputs.previous-artifact }} id: download - uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: pattern: ${{ runner.os }}-${{ runner.arch }}-${{ inputs.previous-artifact }}-* merge-multiple: true diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index 116bdfb3929d3..8f422a0147748 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -294,7 +294,7 @@ jobs: sparse-checkout-cone-mode: false - name: 'Download artifact' - uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: pattern: '*-release-binary' merge-multiple: true From 3f26f1aed07f9671891b64594d9a7bd7c3df1f7c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 11:02:56 -0700 Subject: [PATCH 262/734] [ADT] Reduce boilerplate in DenseSet (NFC) (#158456) The class definitions of DenseSet and SmallDenseSet contain a lot of boilerplate code, repeating the lengthy base class name twice in each definition. This patch simplifies the two definitions by making them type aliases. --- llvm/include/llvm/ADT/DenseSet.h | 36 ++++++++++++++------------------ 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h index 281d4d1c78cc0..60ad9b2eb7762 100644 --- a/llvm/include/llvm/ADT/DenseSet.h +++ b/llvm/include/llvm/ADT/DenseSet.h @@ -250,20 +250,24 @@ bool operator!=(const DenseSetImpl &LHS, return !(LHS == RHS); } +template +using DenseSet = DenseSetImpl< + ValueT, DenseMap>, + ValueInfoT>; + +template +using SmallDenseSet = + DenseSetImpl>, + ValueInfoT>; + } // end namespace detail /// Implements a dense probed hash-table based set. template > -class DenseSet : public detail::DenseSetImpl< - ValueT, - DenseMap>, - ValueInfoT> { - using BaseT = - detail::DenseSetImpl>, - ValueInfoT>; +class DenseSet : public detail::DenseSet { + using BaseT = detail::DenseSet; public: using BaseT::BaseT; @@ -274,16 +278,8 @@ class DenseSet : public detail::DenseSetImpl< template > class SmallDenseSet - : public detail::DenseSetImpl< - ValueT, - SmallDenseMap>, - ValueInfoT> { - using BaseT = detail::DenseSetImpl< - ValueT, - SmallDenseMap>, - ValueInfoT>; + : public detail::SmallDenseSet { + using BaseT = detail::SmallDenseSet; public: using BaseT::BaseT; From ad8d0a13397ba4d4174363120e9c92339ace2d97 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 11:03:03 -0700 Subject: [PATCH 263/734] [ADT] Fix the initial size calculation of SmallDenseMap (#158458) The initial size calculation of SmallDenseMap is strange in several ways: - SmallDenseMap(unsigned) seems to want to take the number of initial buckets as far as I can tell from the variable name NumInitBuckets. In contrast, DenseMap(unsigned) seems to want to take the number of initial entries as far as I can tell from the comment: /// Create a DenseMap with an optional \p InitialReserve that guarantee that /// this number of elements can be inserted in the map without grow() - SmallDenseMap(unsigned) uses llvm::bit_ceil to obtain a power of two. SmallDenseMap(I, E) uses NextPowerOf2 to obtain a power of two. - Presumably, the init() call is to ensure that we won't call grow() while populating the initial elements [I, E). However, NextPowerOf2(std::distance(I, E)) does not ensure that a rehash won't happen. For example, if the number of initial elements is 50, we need 128 buckets, but NextPowerOf2(std::distance(I, E)) would return 64. This patch fixes all these inconsistencies by teaching SmallDenseMap::init to call BaseT::getMinBucketToReserveForEntries just like DenseMap::init. With this patch, all constructors of SmallDenseMap are textually identical to their respective counterparts in DenseMap. --- llvm/include/llvm/ADT/DenseMap.h | 11 +++-- llvm/unittests/ADT/DenseMapTest.cpp | 69 +++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index 23b672eaf8b47..b478ce21d79e6 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -887,10 +887,8 @@ class SmallDenseMap AlignedCharArrayUnion storage; public: - explicit SmallDenseMap(unsigned NumInitBuckets = 0) { - if (NumInitBuckets > InlineBuckets) - NumInitBuckets = llvm::bit_ceil(NumInitBuckets); - init(NumInitBuckets); + explicit SmallDenseMap(unsigned NumElementsToReservre = 0) { + init(NumElementsToReservre); } SmallDenseMap(const SmallDenseMap &other) : BaseT() { @@ -905,7 +903,7 @@ class SmallDenseMap template SmallDenseMap(const InputIt &I, const InputIt &E) { - init(NextPowerOf2(std::distance(I, E))); + init(std::distance(I, E)); this->insert(I, E); } @@ -1017,7 +1015,8 @@ class SmallDenseMap this->BaseT::copyFrom(other); } - void init(unsigned InitBuckets) { + void init(unsigned InitNumEntries) { + auto InitBuckets = BaseT::getMinBucketToReserveForEntries(InitNumEntries); Small = true; if (InitBuckets > InlineBuckets) { Small = false; diff --git a/llvm/unittests/ADT/DenseMapTest.cpp b/llvm/unittests/ADT/DenseMapTest.cpp index 785ab16271d93..50e9c6e138ef1 100644 --- a/llvm/unittests/ADT/DenseMapTest.cpp +++ b/llvm/unittests/ADT/DenseMapTest.cpp @@ -962,4 +962,73 @@ TEST(DenseMapCustomTest, PairPrinting) { EXPECT_EQ(R"({ (1, "one"), (2, "two") })", ::testing::PrintToString(Map)); } +TEST(DenseMapCustomTest, InitSize) { + constexpr unsigned ElemSize = sizeof(std::pair); + + { + DenseMap Map; + EXPECT_EQ(ElemSize * 0U, Map.getMemorySize()); + } + { + DenseMap Map(0); + EXPECT_EQ(ElemSize * 0U, Map.getMemorySize()); + } + { + DenseMap Map(1); + EXPECT_EQ(ElemSize * 4U, Map.getMemorySize()); + } + { + DenseMap Map(2); + EXPECT_EQ(ElemSize * 4U, Map.getMemorySize()); + } + { + DenseMap Map(3); + EXPECT_EQ(ElemSize * 8U, Map.getMemorySize()); + } + { + int A, B; + DenseMap Map = {{&A, 1}, {&B, 2}}; + EXPECT_EQ(ElemSize * 4U, Map.getMemorySize()); + } + { + int A, B, C; + DenseMap Map = {{&A, 1}, {&B, 2}, {&C, 3}}; + EXPECT_EQ(ElemSize * 8U, Map.getMemorySize()); + } +} + +TEST(SmallDenseMapCustomTest, InitSize) { + constexpr unsigned ElemSize = sizeof(std::pair); + { + SmallDenseMap Map; + EXPECT_EQ(ElemSize * 4U, Map.getMemorySize()); + } + { + SmallDenseMap Map(0); + EXPECT_EQ(ElemSize * 4U, Map.getMemorySize()); + } + { + SmallDenseMap Map(1); + EXPECT_EQ(ElemSize * 4U, Map.getMemorySize()); + } + { + SmallDenseMap Map(2); + EXPECT_EQ(ElemSize * 4U, Map.getMemorySize()); + } + { + SmallDenseMap Map(3); + EXPECT_EQ(ElemSize * 8U, Map.getMemorySize()); + } + { + int A, B; + SmallDenseMap Map = {{&A, 1}, {&B, 2}}; + EXPECT_EQ(ElemSize * 4U, Map.getMemorySize()); + } + { + int A, B, C; + SmallDenseMap Map = {{&A, 1}, {&B, 2}, {&C, 3}}; + EXPECT_EQ(ElemSize * 8U, Map.getMemorySize()); + } +} + } // namespace From 0cab8da19ca61147027d66fb8a669a1d9ad521a5 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sun, 14 Sep 2025 14:16:37 -0400 Subject: [PATCH 264/734] [gn] port 698f39bc1959 --- .../gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn index 94b6de7af6044..447a67af6be7b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn @@ -5,7 +5,6 @@ tablegen("RISCVGenDisassemblerTables") { args = [ "-gen-disassembler", "-specialize-decoders-per-bitwidth", - "-ignore-non-decodable-operands", ] td_file = "../RISCV.td" } From d35ce3369809b8212eb44908bd9c90d483ee519d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sun, 14 Sep 2025 14:18:13 -0400 Subject: [PATCH 265/734] [gn] port 5a0be9b2a47d4 --- .../secondary/llvm/lib/Target/Hexagon/Disassembler/BUILD.gn | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/Disassembler/BUILD.gn index 2d21060086036..35a5d86c7e135 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/Disassembler/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/Disassembler/BUILD.gn @@ -2,10 +2,7 @@ import("//llvm/utils/TableGen/tablegen.gni") tablegen("HexagonGenDisassemblerTables") { visibility = [ ":Disassembler" ] - args = [ - "-gen-disassembler", - "-ignore-non-decodable-operands", - ] + args = [ "-gen-disassembler" ] td_file = "../Hexagon.td" } From 685c627b0c810252dbc021f1b6df213b1d983938 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sun, 14 Sep 2025 14:19:00 -0400 Subject: [PATCH 266/734] [gn] port 69f1aebf2017f43 --- .../gn/secondary/llvm/lib/Target/BPF/Disassembler/BUILD.gn | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/Disassembler/BUILD.gn index 924317d20eee6..f47fe7ac28cee 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/Disassembler/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/Disassembler/BUILD.gn @@ -2,10 +2,7 @@ import("//llvm/utils/TableGen/tablegen.gni") tablegen("BPFGenDisassemblerTables") { visibility = [ ":Disassembler" ] - args = [ - "-gen-disassembler", - "-ignore-non-decodable-operands", - ] + args = [ "-gen-disassembler" ] td_file = "../BPF.td" } From 5719fb8c20ffdaa40411bb7ac41ef719122d22f6 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sun, 14 Sep 2025 14:19:41 -0400 Subject: [PATCH 267/734] [gn] port 69d0c3e44ff --- .../gn/secondary/llvm/lib/Target/AVR/Disassembler/BUILD.gn | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/Disassembler/BUILD.gn index bbae270d24c46..dded556b786fb 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/Disassembler/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/Disassembler/BUILD.gn @@ -2,10 +2,7 @@ import("//llvm/utils/TableGen/tablegen.gni") tablegen("AVRGenDisassemblerTables") { visibility = [ ":Disassembler" ] - args = [ - "-gen-disassembler", - "-ignore-non-decodable-operands", - ] + args = [ "-gen-disassembler" ] td_file = "../AVR.td" } From ee3a4f4c94744d296797d1527382a9f060fc241a Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Sun, 14 Sep 2025 15:29:28 -0400 Subject: [PATCH 268/734] [SLPVectorizer] Test -1 stride loads. (#158358) Add a test to generate -1 stride load and flags to force this behaviour. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 15 +++- .../RISCV/reversed-strided-load.ll | 85 +++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-load.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 75cace77ec534..7ca43efb47c6e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -198,6 +198,16 @@ static cl::opt MaxProfitableLoadStride( "slp-max-stride", cl::init(8), cl::Hidden, cl::desc("The maximum stride, considered to be profitable.")); +static cl::opt + DisableTreeReorder("slp-disable-tree-reorder", cl::init(false), cl::Hidden, + cl::desc("Disable tree reordering even if it is " + "profitable. Used for testing only.")); + +static cl::opt + ForceStridedLoads("slp-force-strided-loads", cl::init(false), cl::Hidden, + cl::desc("Generate strided loads even if they are not " + "profitable. Used for testing only.")); + static cl::opt ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")); @@ -7770,6 +7780,9 @@ static void combineOrders(MutableArrayRef Order, } bool BoUpSLP::isProfitableToReorder() const { + if (DisableTreeReorder) + return false; + constexpr unsigned TinyVF = 2; constexpr unsigned TinyTree = 10; constexpr unsigned PhiOpsLimit = 12; @@ -13027,7 +13040,7 @@ void BoUpSLP::transformNodes() { InstructionCost StridedCost = TTI->getStridedMemoryOpCost( Instruction::Load, VecTy, BaseLI->getPointerOperand(), /*VariableMask=*/false, CommonAlignment, CostKind, BaseLI); - if (StridedCost < OriginalVecCost) + if (StridedCost < OriginalVecCost || ForceStridedLoads) // Strided load is more profitable than consecutive load + reverse - // transform the node to strided load. E.State = TreeEntry::StridedVectorize; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-load.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-load.ll new file mode 100644 index 0000000000000..77d3ac1fb2322 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-load.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=riscv64 -mattr=+m,+v \ +; RUN: -passes=slp-vectorizer \ +; RUN: -slp-disable-tree-reorder=true -slp-force-strided-loads=true \ +; RUN: -S < %s | FileCheck %s + +define void @const_stride_reversed(ptr %pl, ptr %ps) { +; CHECK-LABEL: define void @const_stride_reversed( +; CHECK-SAME: ptr [[PL:%.*]], ptr [[PS:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[GEP_L15:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 15 +; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr align 16 [[GEP_L15]], i64 -1, <16 x i1> splat (i1 true), i32 16) +; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 16 +; CHECK-NEXT: ret void +; + %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0 + %gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1 + %gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2 + %gep_l3 = getelementptr inbounds i8, ptr %pl, i64 3 + %gep_l4 = getelementptr inbounds i8, ptr %pl, i64 4 + %gep_l5 = getelementptr inbounds i8, ptr %pl, i64 5 + %gep_l6 = getelementptr inbounds i8, ptr %pl, i64 6 + %gep_l7 = getelementptr inbounds i8, ptr %pl, i64 7 + %gep_l8 = getelementptr inbounds i8, ptr %pl, i64 8 + %gep_l9 = getelementptr inbounds i8, ptr %pl, i64 9 + %gep_l10 = getelementptr inbounds i8, ptr %pl, i64 10 + %gep_l11 = getelementptr inbounds i8, ptr %pl, i64 11 + %gep_l12 = getelementptr inbounds i8, ptr %pl, i64 12 + %gep_l13 = getelementptr inbounds i8, ptr %pl, i64 13 + %gep_l14 = getelementptr inbounds i8, ptr %pl, i64 14 + %gep_l15 = getelementptr inbounds i8, ptr %pl, i64 15 + + %load0 = load i8, ptr %gep_l0 , align 16 + %load1 = load i8, ptr %gep_l1 , align 16 + %load2 = load i8, ptr %gep_l2 , align 16 + %load3 = load i8, ptr %gep_l3 , align 16 + %load4 = load i8, ptr %gep_l4 , align 16 + %load5 = load i8, ptr %gep_l5 , align 16 + %load6 = load i8, ptr %gep_l6 , align 16 + %load7 = load i8, ptr %gep_l7 , align 16 + %load8 = load i8, ptr %gep_l8 , align 16 + %load9 = load i8, ptr %gep_l9 , align 16 + %load10 = load i8, ptr %gep_l10, align 16 + %load11 = load i8, ptr %gep_l11, align 16 + %load12 = load i8, ptr %gep_l12, align 16 + %load13 = load i8, ptr %gep_l13, align 16 + %load14 = load i8, ptr %gep_l14, align 16 + %load15 = load i8, ptr %gep_l15, align 16 + + %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0 + %gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1 + %gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2 + %gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3 + %gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4 + %gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5 + %gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6 + %gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7 + %gep_s8 = getelementptr inbounds i8, ptr %ps, i64 8 + %gep_s9 = getelementptr inbounds i8, ptr %ps, i64 9 + %gep_s10 = getelementptr inbounds i8, ptr %ps, i64 10 + %gep_s11 = getelementptr inbounds i8, ptr %ps, i64 11 + %gep_s12 = getelementptr inbounds i8, ptr %ps, i64 12 + %gep_s13 = getelementptr inbounds i8, ptr %ps, i64 13 + %gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14 + %gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15 + + store i8 %load0, ptr %gep_s15, align 16 + store i8 %load1, ptr %gep_s14, align 16 + store i8 %load2, ptr %gep_s13, align 16 + store i8 %load3, ptr %gep_s12, align 16 + store i8 %load4, ptr %gep_s11, align 16 + store i8 %load5, ptr %gep_s10, align 16 + store i8 %load6, ptr %gep_s9, align 16 + store i8 %load7, ptr %gep_s8, align 16 + store i8 %load8, ptr %gep_s7, align 16 + store i8 %load9, ptr %gep_s6, align 16 + store i8 %load10, ptr %gep_s5, align 16 + store i8 %load11, ptr %gep_s4, align 16 + store i8 %load12, ptr %gep_s3, align 16 + store i8 %load13, ptr %gep_s2, align 16 + store i8 %load14, ptr %gep_s1, align 16 + store i8 %load15, ptr %gep_s0, align 16 + + ret void +} From b30c29c89381f87562edc34c7813cbd310d1d4b0 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Sun, 14 Sep 2025 12:58:48 -0700 Subject: [PATCH 269/734] Revert "[BasicBlockUtils] Handle funclets when detaching EH pad blocks" (#158364) Reverts llvm/llvm-project#157363 Causes crashes, see https://github.com/llvm/llvm-project/pull/157363#issuecomment-3286783238 --- llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 44 +---- .../unreachable-multi-basic-block-funclet.ll | 169 ------------------ 2 files changed, 1 insertion(+), 212 deletions(-) delete mode 100644 llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index d2391e166f942..cad0b4c12b54e 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -58,19 +58,6 @@ static cl::opt MaxDeoptOrUnreachableSuccessorCheckDepth( "is followed by a block that either has a terminating " "deoptimizing call or is terminated with an unreachable")); -static void replaceFuncletPadsRetWithUnreachable(Instruction &I) { - assert(isa(I) && "Instruction must be a funclet pad!"); - for (User *User : make_early_inc_range(I.users())) { - Instruction *ReturnInstr = dyn_cast(User); - if (isa(ReturnInstr) || - isa(ReturnInstr)) { - BasicBlock *ReturnInstrBB = ReturnInstr->getParent(); - ReturnInstr->eraseFromParent(); - new UnreachableInst(ReturnInstrBB->getContext(), ReturnInstrBB); - } - } -} - void llvm::detachDeadBlocks( ArrayRef BBs, SmallVectorImpl *Updates, @@ -88,36 +75,7 @@ void llvm::detachDeadBlocks( // Zap all the instructions in the block. while (!BB->empty()) { Instruction &I = BB->back(); - // Exception handling funclets need to be explicitly addressed. - // These funclets must begin with cleanuppad or catchpad and end with - // cleanupred or catchret. The return instructions can be in different - // basic blocks than the pad instruction. If we would only delete the - // first block, the we would have possible cleanupret and catchret - // instructions with poison arguments, which wouldn't be valid. - if (isa(I)) - replaceFuncletPadsRetWithUnreachable(I); - - // Catchswitch instructions have handlers, that must be catchpads and - // an unwind label, that is either a catchpad or catchswitch. - if (CatchSwitchInst *CSI = dyn_cast(&I)) { - // Iterating over the handlers and the unwind basic block and processing - // catchpads. If the unwind label is a catchswitch, we just replace the - // label with poison later on. - for (unsigned I = 0; I < CSI->getNumSuccessors(); I++) { - BasicBlock *SucBlock = CSI->getSuccessor(I); - Instruction &SucFstInst = *(SucBlock->getFirstNonPHIIt()); - if (isa(SucFstInst)) { - replaceFuncletPadsRetWithUnreachable(SucFstInst); - // There may be catchswitch instructions using the catchpad. - // Just replace those with poison. - if (!SucFstInst.use_empty()) - SucFstInst.replaceAllUsesWith( - PoisonValue::get(SucFstInst.getType())); - SucFstInst.eraseFromParent(); - } - } - } - + // If this instruction is used, replace uses with an arbitrary value. // Because control flow can't get here, we don't care what we replace the // value with. Note that since this block is unreachable, and all values // contained within it must dominate their uses, that all uses will diff --git a/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll b/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll deleted file mode 100644 index d2fccae6770db..0000000000000 --- a/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll +++ /dev/null @@ -1,169 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s - -; cleanuppad/cleanupret - -define void @unreachable_cleanuppad_linear(i64 %shapes.1) personality ptr null { -; CHECK-LABEL: define void @unreachable_cleanuppad_linear( -; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { -; CHECK-NEXT: [[START:.*:]] -; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] -; CHECK-NEXT: ret void -; -start: - %_7 = icmp ult i64 0, %shapes.1 - ret void - -funclet: - %cleanuppad = cleanuppad within none [] - br label %funclet_end - -funclet_end: - cleanupret from %cleanuppad unwind to caller -} - -define void @unreachable_cleanuppad_multiple_predecessors(i64 %shapes.1) personality ptr null { -; CHECK-LABEL: define void @unreachable_cleanuppad_multiple_predecessors( -; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { -; CHECK-NEXT: [[START:.*:]] -; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] -; CHECK-NEXT: ret void -; -start: - %_7 = icmp ult i64 0, %shapes.1 - ret void - -funclet: - %cleanuppad = cleanuppad within none [] - switch i64 %shapes.1, label %otherwise [ i64 0, label %one - i64 1, label %two - i64 42, label %three ] -one: - br label %funclet_end - -two: - br label %funclet_end - -three: - br label %funclet_end - -otherwise: - br label %funclet_end - -funclet_end: - cleanupret from %cleanuppad unwind to caller -} - -; catchpad/catchret - -define void @unreachable_catchpad_linear(i64 %shapes.1) personality ptr null { -; CHECK-LABEL: define void @unreachable_catchpad_linear( -; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { -; CHECK-NEXT: [[START:.*:]] -; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] -; CHECK-NEXT: ret void -; -start: - %_7 = icmp ult i64 0, %shapes.1 - ret void - -dispatch: - %cs = catchswitch within none [label %funclet] unwind to caller - -funclet: - %cleanuppad = catchpad within %cs [] - br label %funclet_end - - -funclet_end: - catchret from %cleanuppad to label %unreachable - -unreachable: - unreachable -} - -define void @unreachable_catchpad_multiple_predecessors(i64 %shapes.1) personality ptr null { -; CHECK-LABEL: define void @unreachable_catchpad_multiple_predecessors( -; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { -; CHECK-NEXT: [[START:.*:]] -; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] -; CHECK-NEXT: ret void -; -start: - %_7 = icmp ult i64 0, %shapes.1 - ret void - -dispatch: - %cs = catchswitch within none [label %funclet] unwind to caller - -funclet: - %cleanuppad = catchpad within %cs [] - switch i64 %shapes.1, label %otherwise [ i64 0, label %one - i64 1, label %two - i64 42, label %three ] -one: - br label %funclet_end - -two: - br label %funclet_end - -three: - br label %funclet_end - -otherwise: - br label %funclet_end - -funclet_end: - catchret from %cleanuppad to label %unreachable - -unreachable: - unreachable -} - -; Issue reproducer - -define void @gh148052(i64 %shapes.1) personality ptr null { -; CHECK-LABEL: define void @gh148052( -; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null { -; CHECK-NEXT: [[START:.*:]] -; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]] -; CHECK-NEXT: call void @llvm.assume(i1 [[_7]]) -; CHECK-NEXT: ret void -; -start: - %_7 = icmp ult i64 0, %shapes.1 - br i1 %_7, label %bb1, label %panic - -bb1: - %_11 = icmp ult i64 0, %shapes.1 - br i1 %_11, label %bb3, label %panic1 - -panic: - unreachable - -bb3: - ret void - -panic1: - invoke void @func(i64 0, i64 0, ptr null) - to label %unreachable unwind label %funclet_bb14 - -funclet_bb14: - %cleanuppad = cleanuppad within none [] - br label %bb13 - -unreachable: - unreachable - -bb10: - cleanupret from %cleanuppad5 unwind to caller - -funclet_bb10: - %cleanuppad5 = cleanuppad within none [] - br label %bb10 - -bb13: - cleanupret from %cleanuppad unwind label %funclet_bb10 -} - -declare void @func(i64, i64, ptr) From 43384913b636854b92c7de9e326f879a1993f445 Mon Sep 17 00:00:00 2001 From: Victor Chernyakin Date: Sun, 14 Sep 2025 14:06:20 -0600 Subject: [PATCH 270/734] [clang-tidy][NFC] Switch to new file header style (#158497) As decided in #118553 and following up #153942. `rename_check.py` has small logic changes, everything else is a mechanical replacement. --- clang-tools-extra/clang-tidy/ClangTidy.cpp | 2 +- clang-tools-extra/clang-tidy/ClangTidy.h | 2 +- .../clang-tidy/ClangTidyCheck.cpp | 2 +- clang-tools-extra/clang-tidy/ClangTidyCheck.h | 2 +- .../ClangTidyDiagnosticConsumer.cpp | 2 +- .../clang-tidy/ClangTidyDiagnosticConsumer.h | 2 +- .../clang-tidy/ClangTidyModule.cpp | 2 +- .../clang-tidy/ClangTidyModule.h | 2 +- .../clang-tidy/ClangTidyModuleRegistry.h | 2 +- .../clang-tidy/ClangTidyOptions.cpp | 2 +- .../clang-tidy/ClangTidyOptions.h | 2 +- .../clang-tidy/ClangTidyProfiling.cpp | 2 +- .../clang-tidy/ClangTidyProfiling.h | 2 +- .../clang-tidy/FileExtensionsSet.h | 2 +- clang-tools-extra/clang-tidy/GlobList.cpp | 2 +- clang-tools-extra/clang-tidy/GlobList.h | 2 +- .../clang-tidy/NoLintDirectiveHandler.cpp | 2 +- .../clang-tidy/NoLintDirectiveHandler.h | 2 +- .../clang-tidy/abseil/AbseilTidyModule.cpp | 2 +- .../clang-tidy/abseil/CleanupCtadCheck.cpp | 2 +- .../clang-tidy/abseil/CleanupCtadCheck.h | 2 +- .../abseil/DurationAdditionCheck.cpp | 2 +- .../clang-tidy/abseil/DurationAdditionCheck.h | 2 +- .../abseil/DurationComparisonCheck.cpp | 2 +- .../abseil/DurationComparisonCheck.h | 2 +- .../abseil/DurationConversionCastCheck.cpp | 2 +- .../abseil/DurationConversionCastCheck.h | 2 +- .../abseil/DurationDivisionCheck.cpp | 2 +- .../clang-tidy/abseil/DurationDivisionCheck.h | 2 +- .../abseil/DurationFactoryFloatCheck.cpp | 2 +- .../abseil/DurationFactoryFloatCheck.h | 2 +- .../abseil/DurationFactoryScaleCheck.cpp | 2 +- .../abseil/DurationFactoryScaleCheck.h | 2 +- .../clang-tidy/abseil/DurationRewriter.cpp | 2 +- .../clang-tidy/abseil/DurationRewriter.h | 2 +- .../abseil/DurationSubtractionCheck.cpp | 2 +- .../abseil/DurationSubtractionCheck.h | 2 +- .../DurationUnnecessaryConversionCheck.cpp | 3 +- .../DurationUnnecessaryConversionCheck.h | 2 +- .../abseil/FasterStrsplitDelimiterCheck.cpp | 2 +- .../abseil/FasterStrsplitDelimiterCheck.h | 2 +- .../abseil/NoInternalDependenciesCheck.cpp | 2 +- .../abseil/NoInternalDependenciesCheck.h | 2 +- .../clang-tidy/abseil/NoNamespaceCheck.cpp | 2 +- .../clang-tidy/abseil/NoNamespaceCheck.h | 2 +- .../abseil/RedundantStrcatCallsCheck.cpp | 2 +- .../abseil/RedundantStrcatCallsCheck.h | 2 +- .../clang-tidy/abseil/StrCatAppendCheck.cpp | 2 +- .../clang-tidy/abseil/StrCatAppendCheck.h | 2 +- .../abseil/StringFindStartswithCheck.cpp | 2 +- .../abseil/StringFindStartswithCheck.h | 2 +- .../abseil/StringFindStrContainsCheck.cpp | 2 +- .../abseil/StringFindStrContainsCheck.h | 2 +- .../clang-tidy/abseil/TimeComparisonCheck.cpp | 3 +- .../clang-tidy/abseil/TimeComparisonCheck.h | 2 +- .../abseil/TimeSubtractionCheck.cpp | 2 +- .../clang-tidy/abseil/TimeSubtractionCheck.h | 2 +- .../UpgradeDurationConversionsCheck.cpp | 2 +- .../abseil/UpgradeDurationConversionsCheck.h | 2 +- clang-tools-extra/clang-tidy/add_new_check.py | 2 +- .../clang-tidy/altera/AlteraTidyModule.cpp | 2 +- .../altera/IdDependentBackwardBranchCheck.cpp | 2 +- .../altera/IdDependentBackwardBranchCheck.h | 2 +- .../altera/KernelNameRestrictionCheck.cpp | 2 +- .../altera/KernelNameRestrictionCheck.h | 2 +- .../altera/SingleWorkItemBarrierCheck.cpp | 2 +- .../altera/SingleWorkItemBarrierCheck.h | 2 +- .../altera/StructPackAlignCheck.cpp | 2 +- .../clang-tidy/altera/StructPackAlignCheck.h | 2 +- .../clang-tidy/altera/UnrollLoopsCheck.cpp | 2 +- .../clang-tidy/altera/UnrollLoopsCheck.h | 2 +- .../clang-tidy/android/AndroidTidyModule.cpp | 2 +- .../android/CloexecAccept4Check.cpp | 2 +- .../clang-tidy/android/CloexecAccept4Check.h | 2 +- .../clang-tidy/android/CloexecAcceptCheck.cpp | 2 +- .../clang-tidy/android/CloexecAcceptCheck.h | 2 +- .../clang-tidy/android/CloexecCheck.cpp | 2 +- .../clang-tidy/android/CloexecCheck.h | 2 +- .../clang-tidy/android/CloexecCreatCheck.cpp | 2 +- .../clang-tidy/android/CloexecCreatCheck.h | 2 +- .../clang-tidy/android/CloexecDupCheck.cpp | 2 +- .../clang-tidy/android/CloexecDupCheck.h | 2 +- .../android/CloexecEpollCreate1Check.cpp | 2 +- .../android/CloexecEpollCreate1Check.h | 2 +- .../android/CloexecEpollCreateCheck.cpp | 2 +- .../android/CloexecEpollCreateCheck.h | 2 +- .../clang-tidy/android/CloexecFopenCheck.cpp | 2 +- .../clang-tidy/android/CloexecFopenCheck.h | 2 +- .../android/CloexecInotifyInit1Check.cpp | 2 +- .../android/CloexecInotifyInit1Check.h | 2 +- .../android/CloexecInotifyInitCheck.cpp | 2 +- .../android/CloexecInotifyInitCheck.h | 2 +- .../android/CloexecMemfdCreateCheck.cpp | 2 +- .../android/CloexecMemfdCreateCheck.h | 2 +- .../clang-tidy/android/CloexecOpenCheck.cpp | 2 +- .../clang-tidy/android/CloexecOpenCheck.h | 2 +- .../clang-tidy/android/CloexecPipe2Check.cpp | 2 +- .../clang-tidy/android/CloexecPipe2Check.h | 2 +- .../clang-tidy/android/CloexecPipeCheck.cpp | 2 +- .../clang-tidy/android/CloexecPipeCheck.h | 2 +- .../clang-tidy/android/CloexecSocketCheck.cpp | 2 +- .../clang-tidy/android/CloexecSocketCheck.h | 2 +- .../ComparisonInTempFailureRetryCheck.cpp | 2 +- .../ComparisonInTempFailureRetryCheck.h | 2 +- .../clang-tidy/boost/BoostTidyModule.cpp | 2 +- .../clang-tidy/boost/UseRangesCheck.cpp | 2 +- .../clang-tidy/boost/UseRangesCheck.h | 2 +- .../clang-tidy/boost/UseToStringCheck.cpp | 2 +- .../clang-tidy/boost/UseToStringCheck.h | 2 +- .../bugprone/ArgumentCommentCheck.cpp | 2 +- .../bugprone/ArgumentCommentCheck.h | 2 +- .../bugprone/AssertSideEffectCheck.cpp | 2 +- .../bugprone/AssertSideEffectCheck.h | 2 +- .../bugprone/AssignmentInIfConditionCheck.cpp | 2 +- .../bugprone/AssignmentInIfConditionCheck.h | 2 +- .../bugprone/BadSignalToKillThreadCheck.cpp | 2 +- .../bugprone/BadSignalToKillThreadCheck.h | 2 +- .../bugprone/BitwisePointerCastCheck.cpp | 2 +- .../bugprone/BitwisePointerCastCheck.h | 2 +- .../BoolPointerImplicitConversionCheck.cpp | 2 +- .../BoolPointerImplicitConversionCheck.h | 2 +- .../clang-tidy/bugprone/BranchCloneCheck.cpp | 2 +- .../clang-tidy/bugprone/BranchCloneCheck.h | 2 +- .../bugprone/BugproneTidyModule.cpp | 2 +- .../CapturingThisInMemberVariableCheck.cpp | 2 +- .../CapturingThisInMemberVariableCheck.h | 2 +- .../bugprone/CastingThroughVoidCheck.cpp | 2 +- .../bugprone/CastingThroughVoidCheck.h | 2 +- .../bugprone/ChainedComparisonCheck.cpp | 2 +- .../bugprone/ChainedComparisonCheck.h | 2 +- ...arePointerToMemberVirtualFunctionCheck.cpp | 2 +- .../bugprone/CopyConstructorInitCheck.cpp | 2 +- .../bugprone/CopyConstructorInitCheck.h | 2 +- .../CrtpConstructorAccessibilityCheck.cpp | 2 +- .../CrtpConstructorAccessibilityCheck.h | 2 +- .../bugprone/DanglingHandleCheck.cpp | 2 +- .../clang-tidy/bugprone/DanglingHandleCheck.h | 2 +- .../DynamicStaticInitializersCheck.cpp | 2 +- .../bugprone/DynamicStaticInitializersCheck.h | 2 +- .../EasilySwappableParametersCheck.cpp | 2 +- .../bugprone/EasilySwappableParametersCheck.h | 2 +- .../clang-tidy/bugprone/EmptyCatchCheck.cpp | 2 +- .../clang-tidy/bugprone/EmptyCatchCheck.h | 2 +- .../bugprone/ExceptionEscapeCheck.cpp | 2 +- .../bugprone/ExceptionEscapeCheck.h | 2 +- .../clang-tidy/bugprone/FoldInitTypeCheck.cpp | 2 +- .../clang-tidy/bugprone/FoldInitTypeCheck.h | 2 +- .../ForwardDeclarationNamespaceCheck.cpp | 2 +- .../ForwardDeclarationNamespaceCheck.h | 2 +- .../ForwardingReferenceOverloadCheck.cpp | 2 +- .../ForwardingReferenceOverloadCheck.h | 2 +- ...citWideningOfMultiplicationResultCheck.cpp | 2 +- ...licitWideningOfMultiplicationResultCheck.h | 2 +- .../bugprone/InaccurateEraseCheck.cpp | 2 +- .../bugprone/InaccurateEraseCheck.h | 2 +- .../bugprone/IncDecInConditionsCheck.cpp | 2 +- .../bugprone/IncDecInConditionsCheck.h | 2 +- .../bugprone/IncorrectEnableIfCheck.cpp | 2 +- .../bugprone/IncorrectEnableIfCheck.h | 2 +- .../IncorrectEnableSharedFromThisCheck.cpp | 2 +- .../IncorrectEnableSharedFromThisCheck.h | 2 +- .../bugprone/IncorrectRoundingsCheck.cpp | 2 +- .../bugprone/IncorrectRoundingsCheck.h | 2 +- .../clang-tidy/bugprone/InfiniteLoopCheck.cpp | 2 +- .../clang-tidy/bugprone/InfiniteLoopCheck.h | 2 +- .../bugprone/IntegerDivisionCheck.cpp | 2 +- .../bugprone/IntegerDivisionCheck.h | 2 +- .../InvalidEnumDefaultInitializationCheck.cpp | 2 +- .../InvalidEnumDefaultInitializationCheck.h | 2 +- .../bugprone/LambdaFunctionNameCheck.cpp | 2 +- .../bugprone/LambdaFunctionNameCheck.h | 2 +- .../bugprone/MacroParenthesesCheck.cpp | 2 +- .../bugprone/MacroParenthesesCheck.h | 2 +- .../MacroRepeatedSideEffectsCheck.cpp | 2 +- .../bugprone/MacroRepeatedSideEffectsCheck.h | 2 +- .../MisleadingSetterOfReferenceCheck.cpp | 2 +- .../MisleadingSetterOfReferenceCheck.h | 2 +- .../MisplacedOperatorInStrlenInAllocCheck.cpp | 2 +- .../MisplacedOperatorInStrlenInAllocCheck.h | 2 +- ...MisplacedPointerArithmeticInAllocCheck.cpp | 2 +- .../MisplacedPointerArithmeticInAllocCheck.h | 2 +- .../bugprone/MisplacedWideningCastCheck.cpp | 2 +- .../bugprone/MisplacedWideningCastCheck.h | 2 +- .../bugprone/MoveForwardingReferenceCheck.cpp | 2 +- .../bugprone/MoveForwardingReferenceCheck.h | 2 +- ...ltiLevelImplicitPointerConversionCheck.cpp | 2 +- ...MultiLevelImplicitPointerConversionCheck.h | 2 +- .../MultipleNewInOneExpressionCheck.cpp | 2 +- .../MultipleNewInOneExpressionCheck.h | 2 +- .../bugprone/MultipleStatementMacroCheck.cpp | 2 +- .../bugprone/MultipleStatementMacroCheck.h | 2 +- .../bugprone/NarrowingConversionsCheck.cpp | 2 +- .../bugprone/NarrowingConversionsCheck.h | 2 +- .../clang-tidy/bugprone/NoEscapeCheck.cpp | 2 +- .../clang-tidy/bugprone/NoEscapeCheck.h | 2 +- .../NonZeroEnumToBoolConversionCheck.cpp | 2 +- .../NonZeroEnumToBoolConversionCheck.h | 2 +- ...eterministicPointerIterationOrderCheck.cpp | 2 +- .../bugprone/NotNullTerminatedResultCheck.cpp | 2 +- .../bugprone/NotNullTerminatedResultCheck.h | 2 +- .../bugprone/OptionalValueConversionCheck.cpp | 2 +- .../bugprone/OptionalValueConversionCheck.h | 2 +- .../bugprone/ParentVirtualCallCheck.cpp | 2 +- .../bugprone/ParentVirtualCallCheck.h | 2 +- ...nterArithmeticOnPolymorphicObjectCheck.cpp | 2 +- ...ointerArithmeticOnPolymorphicObjectCheck.h | 2 +- .../clang-tidy/bugprone/PosixReturnCheck.cpp | 2 +- .../clang-tidy/bugprone/PosixReturnCheck.h | 2 +- .../RedundantBranchConditionCheck.cpp | 2 +- .../bugprone/RedundantBranchConditionCheck.h | 2 +- .../bugprone/ReservedIdentifierCheck.cpp | 2 +- .../bugprone/ReservedIdentifierCheck.h | 2 +- .../ReturnConstRefFromParameterCheck.cpp | 2 +- .../ReturnConstRefFromParameterCheck.h | 2 +- .../bugprone/SharedPtrArrayMismatchCheck.cpp | 2 +- .../bugprone/SharedPtrArrayMismatchCheck.h | 2 +- .../bugprone/SignalHandlerCheck.cpp | 2 +- .../clang-tidy/bugprone/SignalHandlerCheck.h | 2 +- .../bugprone/SignedCharMisuseCheck.cpp | 2 +- .../bugprone/SignedCharMisuseCheck.h | 2 +- .../bugprone/SizeofContainerCheck.cpp | 2 +- .../bugprone/SizeofContainerCheck.h | 2 +- .../bugprone/SizeofExpressionCheck.cpp | 2 +- .../bugprone/SizeofExpressionCheck.h | 2 +- .../bugprone/SmartPtrArrayMismatchCheck.cpp | 2 +- .../bugprone/SmartPtrArrayMismatchCheck.h | 2 +- .../SpuriouslyWakeUpFunctionsCheck.cpp | 2 +- .../bugprone/SpuriouslyWakeUpFunctionsCheck.h | 2 +- .../bugprone/StandaloneEmptyCheck.cpp | 2 +- .../bugprone/StandaloneEmptyCheck.h | 2 +- .../bugprone/StringConstructorCheck.cpp | 2 +- .../bugprone/StringConstructorCheck.h | 2 +- .../bugprone/StringIntegerAssignmentCheck.cpp | 2 +- .../bugprone/StringIntegerAssignmentCheck.h | 2 +- .../StringLiteralWithEmbeddedNulCheck.cpp | 2 +- .../StringLiteralWithEmbeddedNulCheck.h | 2 +- .../bugprone/StringviewNullptrCheck.cpp | 2 +- .../bugprone/StringviewNullptrCheck.h | 2 +- .../bugprone/SuspiciousEnumUsageCheck.cpp | 2 +- .../bugprone/SuspiciousEnumUsageCheck.h | 2 +- .../bugprone/SuspiciousIncludeCheck.cpp | 2 +- .../bugprone/SuspiciousIncludeCheck.h | 2 +- .../SuspiciousMemoryComparisonCheck.cpp | 2 +- .../SuspiciousMemoryComparisonCheck.h | 2 +- .../bugprone/SuspiciousMemsetUsageCheck.cpp | 2 +- .../bugprone/SuspiciousMemsetUsageCheck.h | 2 +- .../bugprone/SuspiciousMissingCommaCheck.cpp | 2 +- .../bugprone/SuspiciousMissingCommaCheck.h | 2 +- .../bugprone/SuspiciousReallocUsageCheck.cpp | 2 +- .../bugprone/SuspiciousReallocUsageCheck.h | 2 +- .../bugprone/SuspiciousSemicolonCheck.cpp | 2 +- .../bugprone/SuspiciousSemicolonCheck.h | 2 +- .../bugprone/SuspiciousStringCompareCheck.cpp | 2 +- .../bugprone/SuspiciousStringCompareCheck.h | 2 +- .../SuspiciousStringviewDataUsageCheck.cpp | 2 +- .../SuspiciousStringviewDataUsageCheck.h | 2 +- .../bugprone/SwappedArgumentsCheck.cpp | 2 +- .../bugprone/SwappedArgumentsCheck.h | 2 +- .../SwitchMissingDefaultCaseCheck.cpp | 2 +- .../bugprone/SwitchMissingDefaultCaseCheck.h | 2 +- .../bugprone/TaggedUnionMemberCountCheck.cpp | 2 +- .../bugprone/TaggedUnionMemberCountCheck.h | 2 +- .../bugprone/TerminatingContinueCheck.cpp | 2 +- .../bugprone/TerminatingContinueCheck.h | 2 +- .../bugprone/ThrowKeywordMissingCheck.cpp | 2 +- .../bugprone/ThrowKeywordMissingCheck.h | 2 +- .../bugprone/TooSmallLoopVariableCheck.cpp | 2 +- .../bugprone/TooSmallLoopVariableCheck.h | 2 +- .../bugprone/UncheckedOptionalAccessCheck.cpp | 2 +- .../bugprone/UncheckedOptionalAccessCheck.h | 2 +- .../UndefinedMemoryManipulationCheck.cpp | 2 +- .../UndefinedMemoryManipulationCheck.h | 2 +- .../bugprone/UndelegatedConstructorCheck.cpp | 2 +- .../bugprone/UndelegatedConstructorCheck.h | 2 +- .../bugprone/UnhandledExceptionAtNewCheck.cpp | 2 +- .../bugprone/UnhandledExceptionAtNewCheck.h | 2 +- .../bugprone/UnhandledSelfAssignmentCheck.cpp | 2 +- .../bugprone/UnhandledSelfAssignmentCheck.h | 2 +- .../UnintendedCharOstreamOutputCheck.cpp | 2 +- .../UnintendedCharOstreamOutputCheck.h | 2 +- .../bugprone/UniquePtrArrayMismatchCheck.cpp | 2 +- .../bugprone/UniquePtrArrayMismatchCheck.h | 2 +- .../bugprone/UnsafeFunctionsCheck.cpp | 2 +- .../bugprone/UnsafeFunctionsCheck.h | 2 +- .../UnusedLocalNonTrivialVariableCheck.cpp | 2 +- .../UnusedLocalNonTrivialVariableCheck.h | 2 +- .../clang-tidy/bugprone/UnusedRaiiCheck.cpp | 2 +- .../clang-tidy/bugprone/UnusedRaiiCheck.h | 2 +- .../bugprone/UnusedReturnValueCheck.cpp | 2 +- .../bugprone/UnusedReturnValueCheck.h | 2 +- .../clang-tidy/bugprone/UseAfterMoveCheck.cpp | 2 +- .../clang-tidy/bugprone/UseAfterMoveCheck.h | 2 +- .../bugprone/VirtualNearMissCheck.cpp | 2 +- .../bugprone/VirtualNearMissCheck.h | 2 +- .../clang-tidy/cert/CERTTidyModule.cpp | 2 +- .../clang-tidy/cert/CommandProcessorCheck.cpp | 2 +- .../clang-tidy/cert/CommandProcessorCheck.h | 2 +- .../cert/DefaultOperatorNewAlignmentCheck.cpp | 2 +- .../cert/DefaultOperatorNewAlignmentCheck.h | 2 +- .../cert/DontModifyStdNamespaceCheck.cpp | 2 +- .../cert/DontModifyStdNamespaceCheck.h | 2 +- .../clang-tidy/cert/FloatLoopCounter.cpp | 2 +- .../clang-tidy/cert/FloatLoopCounter.h | 2 +- .../cert/LimitedRandomnessCheck.cpp | 2 +- .../clang-tidy/cert/LimitedRandomnessCheck.h | 2 +- .../clang-tidy/cert/MutatingCopyCheck.cpp | 2 +- .../clang-tidy/cert/MutatingCopyCheck.h | 2 +- .../NonTrivialTypesLibcMemoryCallsCheck.cpp | 2 +- .../NonTrivialTypesLibcMemoryCallsCheck.h | 2 +- .../ProperlySeededRandomGeneratorCheck.cpp | 2 +- .../cert/ProperlySeededRandomGeneratorCheck.h | 2 +- .../clang-tidy/cert/SetLongJmpCheck.cpp | 2 +- .../clang-tidy/cert/SetLongJmpCheck.h | 2 +- .../cert/StaticObjectExceptionCheck.cpp | 2 +- .../cert/StaticObjectExceptionCheck.h | 2 +- .../cert/ThrownExceptionTypeCheck.cpp | 2 +- .../cert/ThrownExceptionTypeCheck.h | 2 +- .../cert/VariadicFunctionDefCheck.cpp | 2 +- .../cert/VariadicFunctionDefCheck.h | 2 +- .../concurrency/ConcurrencyTidyModule.cpp | 2 +- .../clang-tidy/concurrency/MtUnsafeCheck.cpp | 2 +- .../clang-tidy/concurrency/MtUnsafeCheck.h | 2 +- .../ThreadCanceltypeAsynchronousCheck.cpp | 2 +- .../ThreadCanceltypeAsynchronousCheck.h | 2 +- .../AvoidCapturingLambdaCoroutinesCheck.cpp | 2 +- .../AvoidCapturingLambdaCoroutinesCheck.h | 2 +- .../AvoidConstOrRefDataMembersCheck.cpp | 2 +- .../AvoidConstOrRefDataMembersCheck.h | 2 +- .../cppcoreguidelines/AvoidDoWhileCheck.cpp | 2 +- .../cppcoreguidelines/AvoidDoWhileCheck.h | 2 +- .../cppcoreguidelines/AvoidGotoCheck.cpp | 2 +- .../cppcoreguidelines/AvoidGotoCheck.h | 2 +- .../AvoidNonConstGlobalVariablesCheck.cpp | 2 +- .../AvoidNonConstGlobalVariablesCheck.h | 2 +- ...AvoidReferenceCoroutineParametersCheck.cpp | 2 +- .../AvoidReferenceCoroutineParametersCheck.h | 2 +- .../CppCoreGuidelinesTidyModule.cpp | 2 +- .../cppcoreguidelines/InitVariablesCheck.cpp | 2 +- .../cppcoreguidelines/InitVariablesCheck.h | 2 +- .../InterfacesGlobalInitCheck.cpp | 2 +- .../InterfacesGlobalInitCheck.h | 2 +- .../cppcoreguidelines/MacroUsageCheck.cpp | 2 +- .../cppcoreguidelines/MacroUsageCheck.h | 2 +- .../MisleadingCaptureDefaultByValueCheck.cpp | 2 +- .../MisleadingCaptureDefaultByValueCheck.h | 2 +- .../MissingStdForwardCheck.cpp | 2 +- .../MissingStdForwardCheck.h | 2 +- .../cppcoreguidelines/NoMallocCheck.cpp | 2 +- .../cppcoreguidelines/NoMallocCheck.h | 2 +- .../NoSuspendWithLockCheck.cpp | 2 +- .../NoSuspendWithLockCheck.h | 2 +- .../cppcoreguidelines/OwningMemoryCheck.cpp | 2 +- .../cppcoreguidelines/OwningMemoryCheck.h | 2 +- .../PreferMemberInitializerCheck.cpp | 2 +- .../PreferMemberInitializerCheck.h | 2 +- .../ProBoundsArrayToPointerDecayCheck.cpp | 2 +- .../ProBoundsArrayToPointerDecayCheck.h | 2 +- ...ProBoundsAvoidUncheckedContainerAccess.cpp | 2 +- .../ProBoundsAvoidUncheckedContainerAccess.h | 2 +- .../ProBoundsConstantArrayIndexCheck.cpp | 2 +- .../ProBoundsConstantArrayIndexCheck.h | 2 +- .../ProBoundsPointerArithmeticCheck.cpp | 2 +- .../ProBoundsPointerArithmeticCheck.h | 2 +- .../ProTypeConstCastCheck.cpp | 2 +- .../cppcoreguidelines/ProTypeConstCastCheck.h | 2 +- .../ProTypeCstyleCastCheck.cpp | 2 +- .../ProTypeCstyleCastCheck.h | 2 +- .../ProTypeMemberInitCheck.cpp | 2 +- .../ProTypeMemberInitCheck.h | 2 +- .../ProTypeReinterpretCastCheck.cpp | 2 +- .../ProTypeReinterpretCastCheck.h | 2 +- .../ProTypeStaticCastDowncastCheck.cpp | 2 +- .../ProTypeStaticCastDowncastCheck.h | 2 +- .../ProTypeUnionAccessCheck.cpp | 2 +- .../ProTypeUnionAccessCheck.h | 2 +- .../cppcoreguidelines/ProTypeVarargCheck.cpp | 2 +- .../cppcoreguidelines/ProTypeVarargCheck.h | 2 +- .../RvalueReferenceParamNotMovedCheck.cpp | 2 +- .../RvalueReferenceParamNotMovedCheck.h | 2 +- .../cppcoreguidelines/SlicingCheck.cpp | 2 +- .../cppcoreguidelines/SlicingCheck.h | 2 +- .../SpecialMemberFunctionsCheck.cpp | 2 +- .../SpecialMemberFunctionsCheck.h | 2 +- .../cppcoreguidelines/UseEnumClassCheck.cpp | 2 +- .../cppcoreguidelines/UseEnumClassCheck.h | 2 +- .../VirtualClassDestructorCheck.cpp | 2 +- .../VirtualClassDestructorCheck.h | 2 +- .../clang-tidy/darwin/AvoidSpinlockCheck.cpp | 2 +- .../clang-tidy/darwin/AvoidSpinlockCheck.h | 2 +- .../clang-tidy/darwin/DarwinTidyModule.cpp | 2 +- .../darwin/DispatchOnceNonstaticCheck.cpp | 2 +- .../darwin/DispatchOnceNonstaticCheck.h | 2 +- .../fuchsia/DefaultArgumentsCallsCheck.cpp | 2 +- .../fuchsia/DefaultArgumentsCallsCheck.h | 2 +- .../DefaultArgumentsDeclarationsCheck.cpp | 2 +- .../DefaultArgumentsDeclarationsCheck.h | 2 +- .../clang-tidy/fuchsia/FuchsiaTidyModule.cpp | 2 +- .../fuchsia/MultipleInheritanceCheck.cpp | 2 +- .../fuchsia/MultipleInheritanceCheck.h | 2 +- .../fuchsia/OverloadedOperatorCheck.cpp | 2 +- .../fuchsia/OverloadedOperatorCheck.h | 2 +- .../StaticallyConstructedObjectsCheck.cpp | 2 +- .../StaticallyConstructedObjectsCheck.h | 2 +- .../fuchsia/TrailingReturnCheck.cpp | 2 +- .../clang-tidy/fuchsia/TrailingReturnCheck.h | 2 +- .../fuchsia/VirtualInheritanceCheck.cpp | 2 +- .../fuchsia/VirtualInheritanceCheck.h | 2 +- .../google/AvoidCStyleCastsCheck.cpp | 2 +- .../clang-tidy/google/AvoidCStyleCastsCheck.h | 2 +- .../google/AvoidNSObjectNewCheck.cpp | 2 +- .../clang-tidy/google/AvoidNSObjectNewCheck.h | 2 +- .../AvoidThrowingObjCExceptionCheck.cpp | 2 +- .../google/AvoidThrowingObjCExceptionCheck.h | 2 +- .../AvoidUnderscoreInGoogletestNameCheck.cpp | 2 +- .../AvoidUnderscoreInGoogletestNameCheck.h | 2 +- .../google/DefaultArgumentsCheck.cpp | 2 +- .../clang-tidy/google/DefaultArgumentsCheck.h | 2 +- .../google/ExplicitConstructorCheck.cpp | 2 +- .../google/ExplicitConstructorCheck.h | 2 +- .../google/ExplicitMakePairCheck.cpp | 2 +- .../clang-tidy/google/ExplicitMakePairCheck.h | 2 +- .../clang-tidy/google/FunctionNamingCheck.cpp | 2 +- .../clang-tidy/google/FunctionNamingCheck.h | 2 +- .../google/GlobalNamesInHeadersCheck.cpp | 2 +- .../google/GlobalNamesInHeadersCheck.h | 2 +- .../google/GlobalVariableDeclarationCheck.cpp | 2 +- .../google/GlobalVariableDeclarationCheck.h | 2 +- .../clang-tidy/google/GoogleTidyModule.cpp | 2 +- .../clang-tidy/google/IntegerTypesCheck.cpp | 2 +- .../clang-tidy/google/IntegerTypesCheck.h | 2 +- .../google/OverloadedUnaryAndCheck.cpp | 2 +- .../google/OverloadedUnaryAndCheck.h | 2 +- .../clang-tidy/google/TodoCommentCheck.cpp | 2 +- .../clang-tidy/google/TodoCommentCheck.h | 2 +- .../google/UnnamedNamespaceInHeaderCheck.cpp | 2 +- .../google/UnnamedNamespaceInHeaderCheck.h | 2 +- .../google/UpgradeGoogletestCaseCheck.cpp | 2 +- .../google/UpgradeGoogletestCaseCheck.h | 2 +- .../google/UsingNamespaceDirectiveCheck.cpp | 2 +- .../google/UsingNamespaceDirectiveCheck.h | 2 +- .../hicpp/ExceptionBaseclassCheck.cpp | 2 +- .../hicpp/ExceptionBaseclassCheck.h | 2 +- .../clang-tidy/hicpp/HICPPTidyModule.cpp | 2 +- .../hicpp/IgnoredRemoveResultCheck.cpp | 2 +- .../hicpp/IgnoredRemoveResultCheck.h | 2 +- .../hicpp/MultiwayPathsCoveredCheck.cpp | 2 +- .../hicpp/MultiwayPathsCoveredCheck.h | 2 +- .../clang-tidy/hicpp/NoAssemblerCheck.cpp | 2 +- .../clang-tidy/hicpp/NoAssemblerCheck.h | 2 +- .../clang-tidy/hicpp/SignedBitwiseCheck.cpp | 2 +- .../clang-tidy/hicpp/SignedBitwiseCheck.h | 2 +- .../linuxkernel/LinuxKernelTidyModule.cpp | 2 +- .../linuxkernel/MustCheckErrsCheck.cpp | 2 +- .../linuxkernel/MustCheckErrsCheck.h | 2 +- .../clang-tidy/llvm/HeaderGuardCheck.cpp | 2 +- .../clang-tidy/llvm/HeaderGuardCheck.h | 2 +- .../clang-tidy/llvm/IncludeOrderCheck.cpp | 2 +- .../clang-tidy/llvm/IncludeOrderCheck.h | 2 +- .../clang-tidy/llvm/LLVMTidyModule.cpp | 2 +- .../PreferIsaOrDynCastInConditionalsCheck.cpp | 3 +- .../PreferIsaOrDynCastInConditionalsCheck.h | 2 +- .../llvm/PreferRegisterOverUnsignedCheck.cpp | 2 +- .../llvm/PreferRegisterOverUnsignedCheck.h | 2 +- ...referStaticOverAnonymousNamespaceCheck.cpp | 2 +- .../PreferStaticOverAnonymousNamespaceCheck.h | 2 +- .../clang-tidy/llvm/TwineLocalCheck.cpp | 2 +- .../clang-tidy/llvm/TwineLocalCheck.h | 2 +- .../llvm/UseNewMLIROpBuilderCheck.cpp | 2 +- .../llvm/UseNewMLIROpBuilderCheck.h | 2 +- .../clang-tidy/llvm/UseRangesCheck.cpp | 2 +- .../clang-tidy/llvm/UseRangesCheck.h | 2 +- .../llvmlibc/CalleeNamespaceCheck.cpp | 2 +- .../llvmlibc/CalleeNamespaceCheck.h | 2 +- .../ImplementationInNamespaceCheck.cpp | 2 +- .../llvmlibc/ImplementationInNamespaceCheck.h | 2 +- .../llvmlibc/InlineFunctionDeclCheck.cpp | 2 +- .../llvmlibc/InlineFunctionDeclCheck.h | 2 +- .../llvmlibc/LLVMLibcTidyModule.cpp | 2 +- .../clang-tidy/llvmlibc/NamespaceConstants.h | 2 +- .../RestrictSystemLibcHeadersCheck.cpp | 2 +- .../llvmlibc/RestrictSystemLibcHeadersCheck.h | 2 +- .../misc/ConfusableIdentifierCheck.cpp | 2 +- .../misc/ConfusableIdentifierCheck.h | 2 +- .../ConfusableTable/BuildConfusableTable.cpp | 2 +- .../clang-tidy/misc/ConstCorrectnessCheck.cpp | 2 +- .../clang-tidy/misc/ConstCorrectnessCheck.h | 2 +- .../misc/CoroutineHostileRAIICheck.cpp | 2 +- .../misc/CoroutineHostileRAIICheck.h | 2 +- .../misc/DefinitionsInHeadersCheck.cpp | 2 +- .../misc/DefinitionsInHeadersCheck.h | 2 +- .../misc/HeaderIncludeCycleCheck.cpp | 2 +- .../clang-tidy/misc/HeaderIncludeCycleCheck.h | 2 +- .../clang-tidy/misc/IncludeCleanerCheck.cpp | 2 +- .../clang-tidy/misc/IncludeCleanerCheck.h | 2 +- .../clang-tidy/misc/MiscTidyModule.cpp | 2 +- .../misc/MisleadingBidirectional.cpp | 2 +- .../clang-tidy/misc/MisleadingBidirectional.h | 2 +- .../clang-tidy/misc/MisleadingIdentifier.cpp | 2 +- .../clang-tidy/misc/MisleadingIdentifier.h | 2 +- .../clang-tidy/misc/MisplacedConstCheck.cpp | 2 +- .../clang-tidy/misc/MisplacedConstCheck.h | 2 +- .../misc/NewDeleteOverloadsCheck.cpp | 2 +- .../clang-tidy/misc/NewDeleteOverloadsCheck.h | 2 +- .../clang-tidy/misc/NoRecursionCheck.cpp | 2 +- .../clang-tidy/misc/NoRecursionCheck.h | 2 +- .../clang-tidy/misc/NonCopyableObjects.cpp | 2 +- .../clang-tidy/misc/NonCopyableObjects.h | 2 +- ...onPrivateMemberVariablesInClassesCheck.cpp | 2 +- .../NonPrivateMemberVariablesInClassesCheck.h | 2 +- .../OverrideWithDifferentVisibilityCheck.cpp | 2 +- .../OverrideWithDifferentVisibilityCheck.h | 2 +- .../misc/RedundantExpressionCheck.cpp | 2 +- .../misc/RedundantExpressionCheck.h | 2 +- .../clang-tidy/misc/StaticAssertCheck.cpp | 2 +- .../clang-tidy/misc/StaticAssertCheck.h | 2 +- .../ThrowByValueCatchByReferenceCheck.cpp | 2 +- .../misc/ThrowByValueCatchByReferenceCheck.h | 2 +- .../UnconventionalAssignOperatorCheck.cpp | 2 +- .../misc/UnconventionalAssignOperatorCheck.h | 2 +- .../misc/UniqueptrResetReleaseCheck.cpp | 2 +- .../misc/UniqueptrResetReleaseCheck.h | 2 +- .../clang-tidy/misc/UnusedAliasDeclsCheck.cpp | 2 +- .../clang-tidy/misc/UnusedAliasDeclsCheck.h | 2 +- .../clang-tidy/misc/UnusedParametersCheck.cpp | 2 +- .../clang-tidy/misc/UnusedParametersCheck.h | 2 +- .../clang-tidy/misc/UnusedUsingDeclsCheck.cpp | 2 +- .../clang-tidy/misc/UnusedUsingDeclsCheck.h | 2 +- .../misc/UseAnonymousNamespaceCheck.cpp | 2 +- .../misc/UseAnonymousNamespaceCheck.h | 2 +- .../misc/UseInternalLinkageCheck.cpp | 2 +- .../clang-tidy/misc/UseInternalLinkageCheck.h | 2 +- .../clang-tidy/modernize/AvoidBindCheck.cpp | 2 +- .../clang-tidy/modernize/AvoidBindCheck.h | 2 +- .../modernize/AvoidCArraysCheck.cpp | 2 +- .../clang-tidy/modernize/AvoidCArraysCheck.h | 2 +- .../modernize/ConcatNestedNamespacesCheck.cpp | 2 +- .../modernize/ConcatNestedNamespacesCheck.h | 2 +- .../modernize/DeprecatedHeadersCheck.cpp | 2 +- .../modernize/DeprecatedHeadersCheck.h | 2 +- .../DeprecatedIosBaseAliasesCheck.cpp | 2 +- .../modernize/DeprecatedIosBaseAliasesCheck.h | 2 +- .../IntegralLiteralExpressionMatcher.cpp | 2 +- .../IntegralLiteralExpressionMatcher.h | 2 +- .../clang-tidy/modernize/LoopConvertCheck.cpp | 2 +- .../clang-tidy/modernize/LoopConvertCheck.h | 2 +- .../clang-tidy/modernize/LoopConvertUtils.cpp | 2 +- .../clang-tidy/modernize/LoopConvertUtils.h | 2 +- .../clang-tidy/modernize/MacroToEnumCheck.cpp | 2 +- .../clang-tidy/modernize/MacroToEnumCheck.h | 2 +- .../clang-tidy/modernize/MakeSharedCheck.cpp | 2 +- .../clang-tidy/modernize/MakeSharedCheck.h | 2 +- .../modernize/MakeSmartPtrCheck.cpp | 2 +- .../clang-tidy/modernize/MakeSmartPtrCheck.h | 2 +- .../clang-tidy/modernize/MakeUniqueCheck.cpp | 2 +- .../clang-tidy/modernize/MakeUniqueCheck.h | 2 +- .../MinMaxUseInitializerListCheck.cpp | 2 +- .../modernize/MinMaxUseInitializerListCheck.h | 2 +- .../modernize/ModernizeTidyModule.cpp | 2 +- .../clang-tidy/modernize/PassByValueCheck.cpp | 2 +- .../clang-tidy/modernize/PassByValueCheck.h | 2 +- .../modernize/RawStringLiteralCheck.cpp | 2 +- .../modernize/RawStringLiteralCheck.h | 2 +- .../modernize/RedundantVoidArgCheck.h | 2 +- .../modernize/ReplaceAutoPtrCheck.cpp | 2 +- .../modernize/ReplaceAutoPtrCheck.h | 2 +- ...ReplaceDisallowCopyAndAssignMacroCheck.cpp | 2 +- .../ReplaceDisallowCopyAndAssignMacroCheck.h | 2 +- .../modernize/ReplaceRandomShuffleCheck.cpp | 2 +- .../modernize/ReplaceRandomShuffleCheck.h | 2 +- .../modernize/ReturnBracedInitListCheck.cpp | 2 +- .../modernize/ReturnBracedInitListCheck.h | 2 +- .../clang-tidy/modernize/ShrinkToFitCheck.cpp | 2 +- .../clang-tidy/modernize/ShrinkToFitCheck.h | 2 +- .../clang-tidy/modernize/TypeTraitsCheck.cpp | 2 +- .../clang-tidy/modernize/TypeTraitsCheck.h | 2 +- .../modernize/UnaryStaticAssertCheck.cpp | 2 +- .../modernize/UnaryStaticAssertCheck.h | 2 +- .../clang-tidy/modernize/UseAutoCheck.cpp | 2 +- .../clang-tidy/modernize/UseAutoCheck.h | 2 +- .../modernize/UseBoolLiteralsCheck.cpp | 2 +- .../modernize/UseBoolLiteralsCheck.h | 2 +- .../modernize/UseConstraintsCheck.cpp | 2 +- .../modernize/UseConstraintsCheck.h | 2 +- .../modernize/UseDefaultMemberInitCheck.cpp | 2 +- .../modernize/UseDefaultMemberInitCheck.h | 2 +- .../UseDesignatedInitializersCheck.cpp | 2 +- .../UseDesignatedInitializersCheck.h | 2 +- .../clang-tidy/modernize/UseEmplaceCheck.cpp | 2 +- .../clang-tidy/modernize/UseEmplaceCheck.h | 2 +- .../modernize/UseEqualsDefaultCheck.cpp | 2 +- .../modernize/UseEqualsDefaultCheck.h | 2 +- .../modernize/UseEqualsDeleteCheck.cpp | 2 +- .../modernize/UseEqualsDeleteCheck.h | 2 +- .../UseIntegerSignComparisonCheck.cpp | 2 +- .../modernize/UseIntegerSignComparisonCheck.h | 2 +- .../modernize/UseNodiscardCheck.cpp | 2 +- .../clang-tidy/modernize/UseNodiscardCheck.h | 2 +- .../clang-tidy/modernize/UseNoexceptCheck.cpp | 2 +- .../clang-tidy/modernize/UseNoexceptCheck.h | 2 +- .../clang-tidy/modernize/UseNullptrCheck.cpp | 2 +- .../clang-tidy/modernize/UseNullptrCheck.h | 2 +- .../clang-tidy/modernize/UseOverrideCheck.cpp | 2 +- .../clang-tidy/modernize/UseOverrideCheck.h | 2 +- .../clang-tidy/modernize/UseRangesCheck.cpp | 2 +- .../clang-tidy/modernize/UseRangesCheck.h | 2 +- .../modernize/UseScopedLockCheck.cpp | 2 +- .../clang-tidy/modernize/UseScopedLockCheck.h | 2 +- .../modernize/UseStartsEndsWithCheck.cpp | 2 +- .../modernize/UseStartsEndsWithCheck.h | 2 +- .../modernize/UseStdFormatCheck.cpp | 2 +- .../clang-tidy/modernize/UseStdFormatCheck.h | 2 +- .../modernize/UseStdNumbersCheck.cpp | 2 +- .../clang-tidy/modernize/UseStdNumbersCheck.h | 2 +- .../clang-tidy/modernize/UseStdPrintCheck.cpp | 2 +- .../clang-tidy/modernize/UseStdPrintCheck.h | 2 +- .../modernize/UseTrailingReturnTypeCheck.cpp | 2 +- .../modernize/UseTrailingReturnTypeCheck.h | 2 +- .../modernize/UseTransparentFunctorsCheck.cpp | 2 +- .../modernize/UseTransparentFunctorsCheck.h | 2 +- .../modernize/UseUncaughtExceptionsCheck.cpp | 2 +- .../modernize/UseUncaughtExceptionsCheck.h | 2 +- .../clang-tidy/modernize/UseUsingCheck.cpp | 2 +- .../clang-tidy/modernize/UseUsingCheck.h | 2 +- .../clang-tidy/mpi/BufferDerefCheck.cpp | 2 +- .../clang-tidy/mpi/BufferDerefCheck.h | 2 +- .../clang-tidy/mpi/MPITidyModule.cpp | 2 +- .../clang-tidy/mpi/TypeMismatchCheck.cpp | 2 +- .../clang-tidy/mpi/TypeMismatchCheck.h | 2 +- .../clang-tidy/objc/AssertEquals.cpp | 2 +- .../clang-tidy/objc/AssertEquals.h | 2 +- .../clang-tidy/objc/AvoidNSErrorInitCheck.cpp | 2 +- .../clang-tidy/objc/AvoidNSErrorInitCheck.h | 2 +- .../objc/DeallocInCategoryCheck.cpp | 2 +- .../clang-tidy/objc/DeallocInCategoryCheck.h | 2 +- .../objc/ForbiddenSubclassingCheck.cpp | 2 +- .../objc/ForbiddenSubclassingCheck.h | 2 +- .../clang-tidy/objc/MissingHashCheck.cpp | 2 +- .../clang-tidy/objc/MissingHashCheck.h | 2 +- .../clang-tidy/objc/NSDateFormatterCheck.cpp | 2 +- .../clang-tidy/objc/NSDateFormatterCheck.h | 2 +- .../NSInvocationArgumentLifetimeCheck.cpp | 2 +- .../objc/NSInvocationArgumentLifetimeCheck.h | 2 +- .../clang-tidy/objc/ObjCTidyModule.cpp | 2 +- .../objc/PropertyDeclarationCheck.cpp | 2 +- .../objc/PropertyDeclarationCheck.h | 2 +- .../clang-tidy/objc/SuperSelfCheck.cpp | 2 +- .../clang-tidy/objc/SuperSelfCheck.h | 2 +- .../openmp/ExceptionEscapeCheck.cpp | 2 +- .../clang-tidy/openmp/ExceptionEscapeCheck.h | 2 +- .../clang-tidy/openmp/OpenMPTidyModule.cpp | 2 +- .../clang-tidy/openmp/UseDefaultNoneCheck.cpp | 2 +- .../clang-tidy/openmp/UseDefaultNoneCheck.h | 2 +- .../clang-tidy/performance/AvoidEndlCheck.cpp | 2 +- .../clang-tidy/performance/AvoidEndlCheck.h | 2 +- .../clang-tidy/performance/EnumSizeCheck.cpp | 2 +- .../clang-tidy/performance/EnumSizeCheck.h | 2 +- .../performance/FasterStringFindCheck.cpp | 2 +- .../performance/FasterStringFindCheck.h | 2 +- .../performance/ForRangeCopyCheck.cpp | 2 +- .../performance/ForRangeCopyCheck.h | 2 +- .../ImplicitConversionInLoopCheck.cpp | 2 +- .../ImplicitConversionInLoopCheck.h | 2 +- .../performance/InefficientAlgorithmCheck.cpp | 2 +- .../performance/InefficientAlgorithmCheck.h | 2 +- .../InefficientStringConcatenationCheck.cpp | 2 +- .../InefficientStringConcatenationCheck.h | 3 +- .../InefficientVectorOperationCheck.cpp | 2 +- .../InefficientVectorOperationCheck.h | 2 +- .../performance/MoveConstArgCheck.cpp | 2 +- .../performance/MoveConstArgCheck.h | 2 +- .../performance/MoveConstructorInitCheck.cpp | 2 +- .../performance/MoveConstructorInitCheck.h | 2 +- .../performance/NoAutomaticMoveCheck.cpp | 2 +- .../performance/NoAutomaticMoveCheck.h | 2 +- .../performance/NoIntToPtrCheck.cpp | 2 +- .../clang-tidy/performance/NoIntToPtrCheck.h | 2 +- .../performance/NoexceptDestructorCheck.cpp | 2 +- .../performance/NoexceptDestructorCheck.h | 2 +- .../performance/NoexceptFunctionBaseCheck.cpp | 2 +- .../performance/NoexceptFunctionBaseCheck.h | 2 +- .../NoexceptMoveConstructorCheck.cpp | 2 +- .../NoexceptMoveConstructorCheck.h | 2 +- .../performance/NoexceptSwapCheck.cpp | 2 +- .../performance/NoexceptSwapCheck.h | 2 +- .../performance/PerformanceTidyModule.cpp | 2 +- .../TriviallyDestructibleCheck.cpp | 2 +- .../performance/TriviallyDestructibleCheck.h | 2 +- .../TypePromotionInMathFnCheck.cpp | 2 +- .../performance/TypePromotionInMathFnCheck.h | 2 +- .../UnnecessaryCopyInitialization.cpp | 2 +- .../UnnecessaryCopyInitialization.h | 2 +- .../UnnecessaryValueParamCheck.cpp | 2 +- .../performance/UnnecessaryValueParamCheck.h | 2 +- .../portability/AvoidPragmaOnceCheck.cpp | 2 +- .../portability/AvoidPragmaOnceCheck.h | 2 +- .../portability/PortabilityTidyModule.cpp | 2 +- .../RestrictSystemIncludesCheck.cpp | 2 +- .../portability/RestrictSystemIncludesCheck.h | 2 +- .../portability/SIMDIntrinsicsCheck.cpp | 2 +- .../portability/SIMDIntrinsicsCheck.h | 2 +- .../portability/StdAllocatorConstCheck.cpp | 2 +- .../portability/StdAllocatorConstCheck.h | 2 +- .../TemplateVirtualMemberFunctionCheck.cpp | 2 +- .../TemplateVirtualMemberFunctionCheck.h | 2 +- .../AmbiguousSmartptrResetCallCheck.cpp | 2 +- .../AmbiguousSmartptrResetCallCheck.h | 2 +- .../readability/AvoidConstParamsInDecls.cpp | 2 +- .../readability/AvoidConstParamsInDecls.h | 2 +- .../AvoidNestedConditionalOperatorCheck.cpp | 2 +- .../AvoidNestedConditionalOperatorCheck.h | 2 +- .../AvoidReturnWithVoidValueCheck.cpp | 2 +- .../AvoidReturnWithVoidValueCheck.h | 2 +- .../AvoidUnconditionalPreprocessorIfCheck.cpp | 2 +- .../AvoidUnconditionalPreprocessorIfCheck.h | 2 +- .../BracesAroundStatementsCheck.cpp | 2 +- .../readability/BracesAroundStatementsCheck.h | 2 +- .../readability/ConstReturnTypeCheck.cpp | 2 +- .../readability/ConstReturnTypeCheck.h | 2 +- .../readability/ContainerContainsCheck.cpp | 2 +- .../readability/ContainerContainsCheck.h | 2 +- .../readability/ContainerDataPointerCheck.cpp | 2 +- .../readability/ContainerDataPointerCheck.h | 2 +- .../readability/ContainerSizeEmptyCheck.cpp | 2 +- .../readability/ContainerSizeEmptyCheck.h | 2 +- .../ConvertMemberFunctionsToStatic.cpp | 2 +- .../ConvertMemberFunctionsToStatic.h | 2 +- .../readability/DeleteNullPointerCheck.cpp | 2 +- .../readability/DeleteNullPointerCheck.h | 2 +- .../readability/DuplicateIncludeCheck.cpp | 2 +- .../readability/DuplicateIncludeCheck.h | 2 +- .../readability/ElseAfterReturnCheck.cpp | 2 +- .../readability/ElseAfterReturnCheck.h | 2 +- .../readability/EnumInitialValueCheck.cpp | 2 +- .../readability/EnumInitialValueCheck.h | 2 +- .../FunctionCognitiveComplexityCheck.cpp | 2 +- .../FunctionCognitiveComplexityCheck.h | 2 +- .../readability/FunctionSizeCheck.cpp | 2 +- .../readability/FunctionSizeCheck.h | 2 +- .../readability/IdentifierLengthCheck.cpp | 3 +- .../readability/IdentifierLengthCheck.h | 3 +- .../readability/IdentifierNamingCheck.cpp | 2 +- .../readability/IdentifierNamingCheck.h | 2 +- .../ImplicitBoolConversionCheck.cpp | 2 +- .../readability/ImplicitBoolConversionCheck.h | 2 +- ...onsistentDeclarationParameterNameCheck.cpp | 2 +- .../readability/IsolateDeclarationCheck.cpp | 2 +- .../readability/IsolateDeclarationCheck.h | 2 +- .../readability/MagicNumbersCheck.cpp | 2 +- .../readability/MagicNumbersCheck.h | 2 +- .../MakeMemberFunctionConstCheck.cpp | 2 +- .../MakeMemberFunctionConstCheck.h | 2 +- .../MathMissingParenthesesCheck.cpp | 2 +- .../readability/MathMissingParenthesesCheck.h | 2 +- .../MisleadingIndentationCheck.cpp | 2 +- .../readability/MisleadingIndentationCheck.h | 2 +- .../readability/MisplacedArrayIndexCheck.cpp | 2 +- .../readability/MisplacedArrayIndexCheck.h | 2 +- .../readability/NamedParameterCheck.cpp | 2 +- .../readability/NamedParameterCheck.h | 2 +- .../readability/NamespaceCommentCheck.cpp | 2 +- .../readability/NamespaceCommentCheck.h | 2 +- .../readability/NonConstParameterCheck.cpp | 2 +- .../readability/NonConstParameterCheck.h | 2 +- .../OperatorsRepresentationCheck.cpp | 3 +- .../OperatorsRepresentationCheck.h | 2 +- .../readability/QualifiedAutoCheck.cpp | 2 +- .../readability/QualifiedAutoCheck.h | 2 +- .../readability/ReadabilityTidyModule.cpp | 2 +- .../RedundantAccessSpecifiersCheck.cpp | 2 +- .../RedundantAccessSpecifiersCheck.h | 2 +- .../readability/RedundantCastingCheck.cpp | 2 +- .../readability/RedundantCastingCheck.h | 2 +- .../readability/RedundantControlFlowCheck.cpp | 2 +- .../readability/RedundantControlFlowCheck.h | 2 +- .../readability/RedundantDeclarationCheck.cpp | 2 +- .../readability/RedundantDeclarationCheck.h | 2 +- .../RedundantFunctionPtrDereferenceCheck.cpp | 2 +- .../RedundantFunctionPtrDereferenceCheck.h | 2 +- .../RedundantInlineSpecifierCheck.cpp | 2 +- .../RedundantInlineSpecifierCheck.h | 2 +- .../readability/RedundantMemberInitCheck.cpp | 2 +- .../readability/RedundantMemberInitCheck.h | 2 +- .../RedundantPreprocessorCheck.cpp | 2 +- .../readability/RedundantPreprocessorCheck.h | 2 +- .../readability/RedundantSmartptrGetCheck.cpp | 2 +- .../readability/RedundantSmartptrGetCheck.h | 2 +- .../readability/RedundantStringCStrCheck.h | 2 +- .../ReferenceToConstructedTemporaryCheck.cpp | 3 +- .../ReferenceToConstructedTemporaryCheck.h | 2 +- .../readability/SimplifyBooleanExprCheck.cpp | 2 +- .../readability/SimplifyBooleanExprCheck.h | 2 +- .../SimplifySubscriptExprCheck.cpp | 2 +- .../readability/SimplifySubscriptExprCheck.h | 2 +- .../StaticAccessedThroughInstanceCheck.cpp | 2 +- .../StaticAccessedThroughInstanceCheck.h | 2 +- ...ticDefinitionInAnonymousNamespaceCheck.cpp | 2 +- ...taticDefinitionInAnonymousNamespaceCheck.h | 2 +- .../readability/StringCompareCheck.cpp | 2 +- .../readability/StringCompareCheck.h | 2 +- .../SuspiciousCallArgumentCheck.cpp | 2 +- .../readability/SuspiciousCallArgumentCheck.h | 2 +- .../UniqueptrDeleteReleaseCheck.cpp | 2 +- .../readability/UniqueptrDeleteReleaseCheck.h | 2 +- .../UppercaseLiteralSuffixCheck.cpp | 2 +- .../readability/UppercaseLiteralSuffixCheck.h | 2 +- .../readability/UseAnyOfAllOfCheck.cpp | 2 +- .../readability/UseAnyOfAllOfCheck.h | 2 +- .../UseConcisePreprocessorDirectivesCheck.cpp | 2 +- .../UseConcisePreprocessorDirectivesCheck.h | 2 +- .../readability/UseStdMinMaxCheck.cpp | 2 +- .../readability/UseStdMinMaxCheck.h | 2 +- clang-tools-extra/clang-tidy/rename_check.py | 36 +------------------ .../clang-tidy/tool/ClangTidyMain.cpp | 2 +- .../clang-tidy/tool/ClangTidyMain.h | 2 +- .../clang-tidy/tool/ClangTidyToolMain.cpp | 2 +- .../clang-tidy/tool/clang-tidy-diff.py | 2 +- .../clang-tidy/tool/run-clang-tidy.py | 2 +- .../clang-tidy/utils/ASTUtils.cpp | 2 +- clang-tools-extra/clang-tidy/utils/ASTUtils.h | 2 +- .../clang-tidy/utils/Aliasing.cpp | 2 +- clang-tools-extra/clang-tidy/utils/Aliasing.h | 2 +- .../utils/BracesAroundStatement.cpp | 2 +- .../clang-tidy/utils/BracesAroundStatement.h | 2 +- .../clang-tidy/utils/DeclRefExprUtils.cpp | 2 +- .../clang-tidy/utils/DeclRefExprUtils.h | 2 +- .../utils/DesignatedInitializers.cpp | 2 +- .../clang-tidy/utils/DesignatedInitializers.h | 2 +- .../clang-tidy/utils/ExceptionAnalyzer.cpp | 2 +- .../clang-tidy/utils/ExceptionAnalyzer.h | 2 +- .../utils/ExceptionSpecAnalyzer.cpp | 2 +- .../clang-tidy/utils/ExceptionSpecAnalyzer.h | 2 +- .../clang-tidy/utils/ExprSequence.cpp | 2 +- .../clang-tidy/utils/ExprSequence.h | 2 +- .../clang-tidy/utils/FileExtensionsUtils.cpp | 2 +- .../clang-tidy/utils/FileExtensionsUtils.h | 2 +- .../clang-tidy/utils/FixItHintUtils.cpp | 2 +- .../clang-tidy/utils/FixItHintUtils.h | 2 +- .../utils/FormatStringConverter.cpp | 2 +- .../clang-tidy/utils/FormatStringConverter.h | 2 +- .../clang-tidy/utils/HeaderGuard.cpp | 2 +- .../clang-tidy/utils/HeaderGuard.h | 2 +- .../clang-tidy/utils/IncludeInserter.cpp | 2 +- .../clang-tidy/utils/IncludeInserter.h | 2 +- .../clang-tidy/utils/IncludeSorter.cpp | 2 +- .../clang-tidy/utils/IncludeSorter.h | 2 +- .../clang-tidy/utils/LexerUtils.cpp | 2 +- .../clang-tidy/utils/LexerUtils.h | 2 +- .../clang-tidy/utils/Matchers.cpp | 2 +- clang-tools-extra/clang-tidy/utils/Matchers.h | 2 +- .../clang-tidy/utils/NamespaceAliaser.cpp | 2 +- .../clang-tidy/utils/NamespaceAliaser.h | 2 +- .../clang-tidy/utils/OptionsUtils.cpp | 2 +- .../clang-tidy/utils/OptionsUtils.h | 2 +- .../utils/RenamerClangTidyCheck.cpp | 2 +- .../clang-tidy/utils/RenamerClangTidyCheck.h | 2 +- .../utils/TransformerClangTidyCheck.cpp | 2 +- .../utils/TransformerClangTidyCheck.h | 2 +- .../clang-tidy/utils/TypeTraits.cpp | 2 +- .../clang-tidy/utils/TypeTraits.h | 2 +- .../clang-tidy/utils/UseRangesCheck.cpp | 2 +- .../clang-tidy/utils/UseRangesCheck.h | 2 +- .../clang-tidy/utils/UsingInserter.cpp | 2 +- .../clang-tidy/utils/UsingInserter.h | 2 +- .../zircon/TemporaryObjectsCheck.cpp | 2 +- .../clang-tidy/zircon/TemporaryObjectsCheck.h | 2 +- .../clang-tidy/zircon/ZirconTidyModule.cpp | 2 +- .../test/clang-tidy/check_clang_tidy.py | 2 +- 867 files changed, 867 insertions(+), 909 deletions(-) diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp index 2064c7826da0c..4c36bbccf44d9 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp @@ -1,4 +1,4 @@ -//===--- tools/extra/clang-tidy/ClangTidy.cpp - Clang tidy tool -----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidy.h b/clang-tools-extra/clang-tidy/ClangTidy.h index d37d68ec0a5b9..3d1d3ca0b1791 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.h +++ b/clang-tools-extra/clang-tidy/ClangTidy.h @@ -1,4 +1,4 @@ -//===--- ClangTidy.h - clang-tidy -------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp index 88abcb6946779..d36cc3e6e23db 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp @@ -1,4 +1,4 @@ -//===--- ClangTidyCheck.cpp - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.h b/clang-tools-extra/clang-tidy/ClangTidyCheck.h index 399d45911549d..e53ae532d7e5f 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.h @@ -1,4 +1,4 @@ -//===--- ClangTidyCheck.h - clang-tidy --------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp index fac6e0418d163..d07f15a10555f 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp @@ -1,4 +1,4 @@ -//===--- tools/extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp ----------=== // +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h index 6e7cb7bb10e57..a854756d647c2 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h @@ -1,4 +1,4 @@ -//===--- ClangTidyDiagnosticConsumer.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyModule.cpp b/clang-tools-extra/clang-tidy/ClangTidyModule.cpp index 7432229fda800..4fb4144f835a3 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- tools/extra/clang-tidy/ClangTidyModule.cpp - Clang tidy tool -----===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyModule.h b/clang-tools-extra/clang-tidy/ClangTidyModule.h index 28f54331755a7..7407ab580d378 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyModule.h +++ b/clang-tools-extra/clang-tidy/ClangTidyModule.h @@ -1,4 +1,4 @@ -//===--- ClangTidyModule.h - clang-tidy -------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyModuleRegistry.h b/clang-tools-extra/clang-tidy/ClangTidyModuleRegistry.h index 8a07b05c26446..e0e5e35d4dae0 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyModuleRegistry.h +++ b/clang-tools-extra/clang-tidy/ClangTidyModuleRegistry.h @@ -1,4 +1,4 @@ -//===--- ClangTidyModuleRegistry.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp b/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp index e59f157b468bc..c697a7a3b00bd 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp @@ -1,4 +1,4 @@ -//===--- ClangTidyOptions.cpp - clang-tidy ----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyOptions.h b/clang-tools-extra/clang-tidy/ClangTidyOptions.h index 6ddc5f9b9cf9e..22a954d2ac645 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyOptions.h +++ b/clang-tools-extra/clang-tidy/ClangTidyOptions.h @@ -1,4 +1,4 @@ -//===--- ClangTidyOptions.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp b/clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp index 89867ec30f51f..8ea6b76819804 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp @@ -1,4 +1,4 @@ -//===--- ClangTidyProfiling.cpp - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/ClangTidyProfiling.h b/clang-tools-extra/clang-tidy/ClangTidyProfiling.h index 76deede1716f4..59c213b181ef7 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyProfiling.h +++ b/clang-tools-extra/clang-tidy/ClangTidyProfiling.h @@ -1,4 +1,4 @@ -//===--- ClangTidyProfiling.h - clang-tidy ----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/FileExtensionsSet.h b/clang-tools-extra/clang-tidy/FileExtensionsSet.h index 7ca4e6ee01d3f..95c221c84da2e 100644 --- a/clang-tools-extra/clang-tidy/FileExtensionsSet.h +++ b/clang-tools-extra/clang-tidy/FileExtensionsSet.h @@ -1,4 +1,4 @@ -//===--- FileExtensionsSet.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/GlobList.cpp b/clang-tools-extra/clang-tidy/GlobList.cpp index 8f09ee075bbd6..667a25657a4c9 100644 --- a/clang-tools-extra/clang-tidy/GlobList.cpp +++ b/clang-tools-extra/clang-tidy/GlobList.cpp @@ -1,4 +1,4 @@ -//===--- tools/extra/clang-tidy/GlobList.cpp ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/GlobList.h b/clang-tools-extra/clang-tidy/GlobList.h index 4317928270adf..c9086df2b7973 100644 --- a/clang-tools-extra/clang-tidy/GlobList.h +++ b/clang-tools-extra/clang-tidy/GlobList.h @@ -1,4 +1,4 @@ -//===--- GlobList.h ---------------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp index bbae2c171f790..ef20ee18347df 100644 --- a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp +++ b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp @@ -1,4 +1,4 @@ -//===-- clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp -----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h index e862195abaabb..e33d0f2781886 100644 --- a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h +++ b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h @@ -1,4 +1,4 @@ -//===-- clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h ----*- C++ *-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/AbseilTidyModule.cpp b/clang-tools-extra/clang-tidy/abseil/AbseilTidyModule.cpp index 78605d59b4421..8971530bab9b2 100644 --- a/clang-tools-extra/clang-tidy/abseil/AbseilTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/abseil/AbseilTidyModule.cpp @@ -1,4 +1,4 @@ -//===------- AbseilTidyModule.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp b/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp index 8063fc540cce5..dd20ad8a4c269 100644 --- a/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp @@ -1,4 +1,4 @@ -//===--- CleanupCtadCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h b/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h index 5e2350e071bdf..414085146bfe4 100644 --- a/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h @@ -1,4 +1,4 @@ -//===--- CleanupCtadCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.cpp index d4b8c9e6d8942..4e1bd3ae32ee5 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.cpp @@ -1,4 +1,4 @@ -//===--- DurationAdditionCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h index ac71f34fed180..e740326a3d6de 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h @@ -1,4 +1,4 @@ -//===--- DurationAdditionCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.cpp index 3baacb36c3f7a..cb8a478e288b6 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.cpp @@ -1,4 +1,4 @@ -//===--- DurationComparisonCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.h index 65ab7a38eb289..d9fc8cb165235 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.h @@ -1,4 +1,4 @@ -//===--- DurationComparisonCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.cpp index 869a0ec44556c..cf591d9589057 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.cpp @@ -1,4 +1,4 @@ -//===--- DurationConversionCastCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.h index a898ba0483966..cd45bc078fde6 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.h @@ -1,4 +1,4 @@ -//===--- DurationConversionCastCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.cpp index 50e2d0366c768..b23d86c456c51 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.cpp @@ -1,4 +1,4 @@ -//===--- DurationDivisionCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.h index c8f259521b648..810f7d269f38f 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.h @@ -1,4 +1,4 @@ -//===--- DurationDivisionCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.cpp index 398f1691dca39..cccd7cf796150 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.cpp @@ -1,4 +1,4 @@ -//===--- DurationFactoryFloatCheck.cpp - clang-tidy -----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.h index e7c3985a7fd92..1d688da43e268 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.h @@ -1,4 +1,4 @@ -//===--- DurationFactoryFloatCheck.h - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp index 121892fd0daa9..1d6ff1ab17abd 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp @@ -1,4 +1,4 @@ -//===--- DurationFactoryScaleCheck.cpp - clang-tidy -----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.h index f5f088c49897d..1d53d13fa9f9a 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.h @@ -1,4 +1,4 @@ -//===--- DurationFactoryScaleCheck.h - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp index 4cdbbd43c1431..e57073e500ccc 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp @@ -1,4 +1,4 @@ -//===--- DurationRewriter.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.h b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.h index dc05b3fe3b55a..27d6ca0616985 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.h @@ -1,4 +1,4 @@ -//===--- DurationRewriter.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.cpp index 48600298a20ca..fd5e2038f75d1 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.cpp @@ -1,4 +1,4 @@ -//===--- DurationSubtractionCheck.cpp - clang-tidy ------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.h index c865f2f842a0d..b092561df909c 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.h @@ -1,4 +1,4 @@ -//===--- DurationSubtractionCheck.h - clang-tidy ----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.cpp index 9bb1fd57a4401..805d7dacd4eec 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.cpp @@ -1,5 +1,4 @@ -//===--- DurationUnnecessaryConversionCheck.cpp - clang-tidy -//-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h index fc9cf23459425..a5bd4dca6ce1f 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h @@ -1,4 +1,4 @@ -//===--- DurationUnnecessaryConversionCheck.h - clang-tidy ------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp b/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp index 4a6f17ed5f868..13d566087688f 100644 --- a/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp @@ -1,4 +1,4 @@ -//===--- FasterStrsplitDelimiterCheck.cpp - clang-tidy---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.h b/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.h index b6caacd505c31..96e261d86697b 100644 --- a/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.h @@ -1,4 +1,4 @@ -//===--- FasterStrsplitDelimiterCheck.h - clang-tidy-------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.cpp b/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.cpp index 19409d0616f06..c090e5ac54222 100644 --- a/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoInternalDependenciesCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.h b/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.h index 9c16524ff6b59..7b46ba55e008f 100644 --- a/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.h @@ -1,4 +1,4 @@ -//===--- NoInternalDependenciesCheck.h - clang-tidy--------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.cpp index 6c2baa4c41412..74facceddac8b 100644 --- a/clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoNamespaceCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.h b/clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.h index be56adad03bcf..d3ab5cc5219ef 100644 --- a/clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.h @@ -1,4 +1,4 @@ -//===--- NoNamespaceCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/RedundantStrcatCallsCheck.cpp b/clang-tools-extra/clang-tidy/abseil/RedundantStrcatCallsCheck.cpp index ef26a8a76cb37..d7cc0cacab6ea 100644 --- a/clang-tools-extra/clang-tidy/abseil/RedundantStrcatCallsCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/RedundantStrcatCallsCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantStrcatCallsCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/RedundantStrcatCallsCheck.h b/clang-tools-extra/clang-tidy/abseil/RedundantStrcatCallsCheck.h index a27899d3e821a..a5300a399c89d 100644 --- a/clang-tools-extra/clang-tidy/abseil/RedundantStrcatCallsCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/RedundantStrcatCallsCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantStrcatCallsCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/StrCatAppendCheck.cpp b/clang-tools-extra/clang-tidy/abseil/StrCatAppendCheck.cpp index ced92590be02e..e088e286214b0 100644 --- a/clang-tools-extra/clang-tidy/abseil/StrCatAppendCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/StrCatAppendCheck.cpp @@ -1,4 +1,4 @@ -//===--- StrCatAppendCheck.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/StrCatAppendCheck.h b/clang-tools-extra/clang-tidy/abseil/StrCatAppendCheck.h index fcd9d4b6e1885..93245c01cebb4 100644 --- a/clang-tools-extra/clang-tidy/abseil/StrCatAppendCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/StrCatAppendCheck.h @@ -1,4 +1,4 @@ -//===--- StrCatAppendCheck.h - clang-tidy------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp index 221e924c10f62..92d63057caf65 100644 --- a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp @@ -1,4 +1,4 @@ -//===--- StringFindStartswithCheck.cc - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h index de3bd4d422200..0d0866db29346 100644 --- a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h @@ -1,4 +1,4 @@ -//===--- StringFindStartswithCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.cpp b/clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.cpp index 0c2fe285ce060..6eb559717077b 100644 --- a/clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.cpp @@ -1,4 +1,4 @@ -//===--- StringFindStrContainsCheck.cc - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.h b/clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.h index 68b827c5de0e1..f939c0b5791e5 100644 --- a/clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.h @@ -1,4 +1,4 @@ -//===--- StringFindStrContainsCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.cpp b/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.cpp index 2bcd8064400f8..52121a57de0d1 100644 --- a/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.cpp @@ -1,5 +1,4 @@ -//===--- TimeComparisonCheck.cpp - clang-tidy -//--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h b/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h index bf22977e9d0df..bbf74bebd26ae 100644 --- a/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h @@ -1,4 +1,4 @@ -//===--- TimeComparisonCheck.h - clang-tidy ---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/TimeSubtractionCheck.cpp b/clang-tools-extra/clang-tidy/abseil/TimeSubtractionCheck.cpp index 2def393938e67..228d974cd5e23 100644 --- a/clang-tools-extra/clang-tidy/abseil/TimeSubtractionCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/TimeSubtractionCheck.cpp @@ -1,4 +1,4 @@ -//===--- TimeSubtractionCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/TimeSubtractionCheck.h b/clang-tools-extra/clang-tidy/abseil/TimeSubtractionCheck.h index 9e2ec1c8def20..f8bb599d36d5d 100644 --- a/clang-tools-extra/clang-tidy/abseil/TimeSubtractionCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/TimeSubtractionCheck.h @@ -1,4 +1,4 @@ -//===--- TimeSubtractionCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/UpgradeDurationConversionsCheck.cpp b/clang-tools-extra/clang-tidy/abseil/UpgradeDurationConversionsCheck.cpp index b2eddf67edb3f..f7905e081170e 100644 --- a/clang-tools-extra/clang-tidy/abseil/UpgradeDurationConversionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/UpgradeDurationConversionsCheck.cpp @@ -1,4 +1,4 @@ -//===--- UpgradeDurationConversionsCheck.cpp - clang-tidy -----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/abseil/UpgradeDurationConversionsCheck.h b/clang-tools-extra/clang-tidy/abseil/UpgradeDurationConversionsCheck.h index 8e7d9829533d4..e4865b941f2ac 100644 --- a/clang-tools-extra/clang-tidy/abseil/UpgradeDurationConversionsCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/UpgradeDurationConversionsCheck.h @@ -1,4 +1,4 @@ -//===--- UpgradeDurationConversionsCheck.h - clang-tidy ---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/add_new_check.py b/clang-tools-extra/clang-tidy/add_new_check.py index 2b51a1dc40ebc..0035da288dcf5 100755 --- a/clang-tools-extra/clang-tidy/add_new_check.py +++ b/clang-tools-extra/clang-tidy/add_new_check.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# ===- add_new_check.py - clang-tidy check generator ---------*- python -*--===# +# ===-----------------------------------------------------------------------===# # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp b/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp index 02a43ba86d7bb..28733ef1d994c 100644 --- a/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- AlteraTidyModule.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.cpp b/clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.cpp index 94db0a793cf53..49ba17ce643fe 100644 --- a/clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.cpp @@ -1,4 +1,4 @@ -//===--- IdDependentBackwardBranchCheck.cpp - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.h b/clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.h index cf964a2d5d6f6..0030faa3c5ec5 100644 --- a/clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.h +++ b/clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.h @@ -1,4 +1,4 @@ -//===--- IdDependentBackwardBranchCheck.h - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.cpp b/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.cpp index a94d6c8d7c4e6..4c740e31ae7be 100644 --- a/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.cpp @@ -1,4 +1,4 @@ -//===--- KernelNameRestrictionCheck.cpp - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h b/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h index 02d82f4d6891a..bd8aafecf4f76 100644 --- a/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h +++ b/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h @@ -1,4 +1,4 @@ -//===--- KernelNameRestrictionCheck.h - clang-tidy --------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp index c21b7cab1b8da..c9df658d9bd67 100644 --- a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp @@ -1,4 +1,4 @@ -//===--- SingleWorkItemBarrierCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h index 62a055b094645..5560f2765f9f9 100644 --- a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h +++ b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h @@ -1,4 +1,4 @@ -//===--- SingleWorkItemBarrierCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp index d7b8f7bc62409..0a19378949f46 100644 --- a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp @@ -1,4 +1,4 @@ -//===--- StructPackAlignCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h index f360bcef4f14e..f6f2d1fa529e5 100644 --- a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h @@ -1,4 +1,4 @@ -//===--- StructPackAlignCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp index 9846a573a8c29..6aad3c6b191ed 100644 --- a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnrollLoopsCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.h b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.h index 0d8306e8437b7..453176fa4894a 100644 --- a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.h +++ b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.h @@ -1,4 +1,4 @@ -//===--- UnrollLoopsCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/AndroidTidyModule.cpp b/clang-tools-extra/clang-tidy/android/AndroidTidyModule.cpp index 17efa10909d0a..40362531f2daf 100644 --- a/clang-tools-extra/clang-tidy/android/AndroidTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/android/AndroidTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- AndroidTidyModule.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.cpp b/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.cpp index 8c4bcc70b300e..8cf22ba2acb4a 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.cpp @@ -1,4 +1,4 @@ -//===--- CloexecAccept4Check.cpp - clang-tidy------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h b/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h index a34d12041cf3c..e7286dc519484 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h +++ b/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h @@ -1,4 +1,4 @@ -//===--- CloexecAccept4Check.h - clang-tidy----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.cpp index c90fc7ba1bb04..9cd888cca023b 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecAcceptCheck.cpp - clang-tidy-------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h b/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h index 013fa5fa1725e..9b982b2b104ca 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecAcceptCheck.h - clang-tidy-----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp index f4d657a7f4e90..cd83423adae05 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecCheck.cpp - clang-tidy-------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecCheck.h b/clang-tools-extra/clang-tidy/android/CloexecCheck.h index edbff70fbd33e..79f7ab3354d8d 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecCheck.h - clang-tidy-----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.cpp index 8d9f45c3567f2..ae44efb629893 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecCreatCheck.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h b/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h index b1ab914d57d15..e0629f2ac4061 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecCreatCheck.h - clang-tidy------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecDupCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecDupCheck.cpp index 89191083c18c8..5ac1b6fb632e1 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecDupCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecDupCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecDupCheck.cpp - clang-tidy----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecDupCheck.h b/clang-tools-extra/clang-tidy/android/CloexecDupCheck.h index 9554b7cf4ed8f..3016867e56189 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecDupCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecDupCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecDupCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.cpp b/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.cpp index 01b771b2072c8..f3c26b48c432a 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.cpp @@ -1,4 +1,4 @@ -//===--- CloexecEpollCreate1Check.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h b/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h index b7912aba30962..cb0d40b8b9f36 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h +++ b/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h @@ -1,4 +1,4 @@ -//===--- CloexecEpollCreate1Check.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.cpp index fc02e542863bc..727f0bef662de 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecEpollCreateCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h b/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h index 1dbbcb1e98502..9010179bd7036 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecEpollCreateCheck.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.cpp index bb9d0d2cb3da3..8ddd6a0523156 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecFopenCheck.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h b/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h index 96517f55a5b55..1e0e7d76933c7 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecFopenCheck.h - clang-tidy------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.cpp b/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.cpp index 910793582d67c..c64ef82a35ad6 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.cpp @@ -1,4 +1,4 @@ -//===--- CloexecInotifyInit1Check.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h b/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h index c87f1fdd956e4..50bc4bbaa7de5 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h +++ b/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h @@ -1,4 +1,4 @@ -//===--- CloexecInotifyInit1Check.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.cpp index ed591799d0656..d3502205d1642 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecInotifyInitCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h b/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h index 1ef07de6cad54..7db4ab15c2f9e 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecInotifyInitCheck.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.cpp index 4e53d607d81e3..5ecf908aabb59 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecMemfdCreateCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h b/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h index 1518d20fd4c5c..43a27dd5658a5 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecMemfdCreateCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.cpp index 623b6ab02e7ba..8c24482c73251 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecOpenCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h b/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h index 692d2a2319c1c..d95fe21fb3e88 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecOpenCheck.h - clang-tidy-----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.cpp b/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.cpp index e32332bdfc953..a024ea3431ddf 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.cpp @@ -1,4 +1,4 @@ -//===--- CloexecPipe2Check.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h b/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h index 68e5f4270ceb0..17d9b4f326e86 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h +++ b/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h @@ -1,4 +1,4 @@ -//===--- CloexecPipe2Check.h - clang-tidy------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.cpp index c59b127dc87ac..a475dff4a2682 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecPipeCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h b/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h index b5ef892196b5a..47a202e8542eb 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecPipeCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.cpp index 12b31a050c2c0..4e9f4c33f0b83 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.cpp @@ -1,4 +1,4 @@ -//===--- CloexecSocketCheck.cpp - clang-tidy-------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h b/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h index c046337f1d229..8ef02c1f197b7 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h @@ -1,4 +1,4 @@ -//===--- CloexecSocketCheck.h - clang-tidy-----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp index 4c33e5db6d5b9..78e58bccaeba1 100644 --- a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp @@ -1,4 +1,4 @@ -//===--- ComparisonInTempFailureRetryCheck.cpp - clang-tidy----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.h b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.h index 201c16fe70aa3..b7316e4c5f47a 100644 --- a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.h +++ b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.h @@ -1,4 +1,4 @@ -//===--- ComparisonInTempFailureRetryCheck.h - clang-tidy--------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp b/clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp index f414fe750d023..c13a24401afba 100644 --- a/clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp @@ -1,4 +1,4 @@ -//===------- BoostTidyModule.cpp - clang-tidy -----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp index e45687fde6d9f..34ecee5badb15 100644 --- a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp +++ b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h index d91e6393a0e85..107d801969fc4 100644 --- a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h +++ b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h @@ -1,4 +1,4 @@ -//===--- UseRangesCheck.h - clang-tidy --------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/boost/UseToStringCheck.cpp b/clang-tools-extra/clang-tidy/boost/UseToStringCheck.cpp index bcd5def290599..3574108ee5697 100644 --- a/clang-tools-extra/clang-tidy/boost/UseToStringCheck.cpp +++ b/clang-tools-extra/clang-tidy/boost/UseToStringCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseToStringCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/boost/UseToStringCheck.h b/clang-tools-extra/clang-tidy/boost/UseToStringCheck.h index f62df83ed8e54..a245d11ee1c8a 100644 --- a/clang-tools-extra/clang-tidy/boost/UseToStringCheck.h +++ b/clang-tools-extra/clang-tidy/boost/UseToStringCheck.h @@ -1,4 +1,4 @@ -//===--- UseToStringCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp index 15e7b53ed5be0..c0a778a027377 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp @@ -1,4 +1,4 @@ -//===--- ArgumentCommentCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.h b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.h index 3d608df752c03..30fa32fad72e7 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.h @@ -1,4 +1,4 @@ -//===--- ArgumentCommentCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp index 0889a1a737189..227641d73885e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp @@ -1,4 +1,4 @@ -//===--- AssertSideEffectCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h index 5cd1132bbd839..b65e1a19e81ac 100644 --- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h @@ -1,4 +1,4 @@ -//===--- AssertSideEffectCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/AssignmentInIfConditionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/AssignmentInIfConditionCheck.cpp index e03cac6c5fd83..2c8856298e7be 100644 --- a/clang-tools-extra/clang-tidy/bugprone/AssignmentInIfConditionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/AssignmentInIfConditionCheck.cpp @@ -1,4 +1,4 @@ -//===--- AssignmentInIfConditionCheck.cpp - clang-tidy --------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/AssignmentInIfConditionCheck.h b/clang-tools-extra/clang-tidy/bugprone/AssignmentInIfConditionCheck.h index 072b4dd79fe5d..3ae4f36913d5f 100644 --- a/clang-tools-extra/clang-tidy/bugprone/AssignmentInIfConditionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/AssignmentInIfConditionCheck.h @@ -1,4 +1,4 @@ -//===--- AssignmentInIfConditionCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp index 8c13ce5a90e9b..e1d0538ab1644 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp @@ -1,4 +1,4 @@ -//===--- BadSignalToKillThreadCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.h b/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.h index f21b8c09eb0c6..aa4d83c89a08d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.h @@ -1,4 +1,4 @@ -//===--- BadSignalToKillThreadCheck.h - clang-tidy --------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/BitwisePointerCastCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/BitwisePointerCastCheck.cpp index 0992e49b7f372..a9e7ae8734677 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BitwisePointerCastCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BitwisePointerCastCheck.cpp @@ -1,4 +1,4 @@ -//===--- BitwisePointerCastCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/BitwisePointerCastCheck.h b/clang-tools-extra/clang-tidy/bugprone/BitwisePointerCastCheck.h index 1515519b3c9fd..71dc159573619 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BitwisePointerCastCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/BitwisePointerCastCheck.h @@ -1,4 +1,4 @@ -//===--- BitwisePointerCastCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.cpp index 09b198d24dc7a..df8552436241e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.cpp @@ -1,4 +1,4 @@ -//===--- BoolPointerImplicitConversionCheck.cpp - clang-tidy --------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.h b/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.h index ef62e3d8b0fb1..19dcdf5218c35 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.h @@ -1,4 +1,4 @@ -//===--- BoolPointerImplicitConversionCheck.h - clang-tidy ------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp index a6cd68edda55e..07bb08166a006 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp @@ -1,4 +1,4 @@ -//===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.h b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.h index 599da14c136fd..22dbb2384900c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.h @@ -1,4 +1,4 @@ -//===--- BranchCloneCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp index fe261e729539c..491de6acea2b7 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- BugproneTidyModule.cpp - clang-tidy ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/CapturingThisInMemberVariableCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/CapturingThisInMemberVariableCheck.cpp index f188ae5ec81b1..a376de505dd70 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CapturingThisInMemberVariableCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/CapturingThisInMemberVariableCheck.cpp @@ -1,4 +1,4 @@ -//===--- CapturingThisInMemberVariableCheck.cpp - clang-tidy --------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/CapturingThisInMemberVariableCheck.h b/clang-tools-extra/clang-tidy/bugprone/CapturingThisInMemberVariableCheck.h index 934f99cd35797..6aba9ee84d2bd 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CapturingThisInMemberVariableCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/CapturingThisInMemberVariableCheck.h @@ -1,4 +1,4 @@ -//===--- CapturingThisInMemberVariableCheck.h - clang-tidy ------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.cpp index f0a9ace229740..aaddf4bdd259e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.cpp @@ -1,4 +1,4 @@ -//===--- CastingThroughVoidCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.h b/clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.h index 834676aaf0543..313f3f240f5b3 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/CastingThroughVoidCheck.h @@ -1,4 +1,4 @@ -//===--- CastingThroughVoidCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ChainedComparisonCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ChainedComparisonCheck.cpp index 7378f1a24ffd3..6af535f712d71 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ChainedComparisonCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ChainedComparisonCheck.cpp @@ -1,4 +1,4 @@ -//===--- ChainedComparisonCheck.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ChainedComparisonCheck.h b/clang-tools-extra/clang-tidy/bugprone/ChainedComparisonCheck.h index a914149a42e69..bf8e3f709d30b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ChainedComparisonCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ChainedComparisonCheck.h @@ -1,4 +1,4 @@ -//===--- ChainedComparisonCheck.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ComparePointerToMemberVirtualFunctionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ComparePointerToMemberVirtualFunctionCheck.cpp index 1cbf1e22a33a7..602b63e43ad9e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ComparePointerToMemberVirtualFunctionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ComparePointerToMemberVirtualFunctionCheck.cpp @@ -1,4 +1,4 @@ -//===--- ComparePointerToMemberVirtualFunctionCheck.cpp - clang-tidy ------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.cpp index 6b26b46d032f9..76bcbbbcdf680 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.cpp @@ -1,4 +1,4 @@ -//===--- CopyConstructorInitCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h index 4aef892476c46..02755b5894b18 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h @@ -1,4 +1,4 @@ -//===--- CopyConstructorInitCheck.h - clang-tidy--------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.cpp index 0625468d9da88..60f7be8996933 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.cpp @@ -1,4 +1,4 @@ -//===--- CrtpConstructorAccessibilityCheck.cpp - clang-tidy ---------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.h b/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.h index 785116218f468..c7d7c9f7c0e69 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.h @@ -1,4 +1,4 @@ -//===--- CrtpConstructorAccessibilityCheck.h - clang-tidy -------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.cpp index e13b1ceacc539..5b741e8c35b9a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.cpp @@ -1,4 +1,4 @@ -//===--- DanglingHandleCheck.cpp - clang-tidy------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h b/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h index 981e9b571a618..6443b0aa59548 100644 --- a/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h @@ -1,4 +1,4 @@ -//===--- DanglingHandleCheck.h - clang-tidy----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.cpp index 3fe028b94771d..4d0428ec18598 100644 --- a/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.cpp @@ -1,4 +1,4 @@ -//===--- DynamicStaticInitializersCheck.cpp - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h b/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h index 66ed2828502b6..e02c62a53ffa0 100644 --- a/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h @@ -1,4 +1,4 @@ -//===--- DynamicStaticInitializersCheck.h - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index c426b32ccade3..d8207b30f1b5e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -1,4 +1,4 @@ -//===--- EasilySwappableParametersCheck.cpp - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h index 9d1037ade727a..284b4f5b9935e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h @@ -1,4 +1,4 @@ -//===--- EasilySwappableParametersCheck.h - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/EmptyCatchCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EmptyCatchCheck.cpp index be0a2a1baa12a..eebab847d1070 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EmptyCatchCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EmptyCatchCheck.cpp @@ -1,4 +1,4 @@ -//===--- EmptyCatchCheck.cpp - clang-tidy ---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/EmptyCatchCheck.h b/clang-tools-extra/clang-tidy/bugprone/EmptyCatchCheck.h index b0694384f5c2f..acef43934adba 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EmptyCatchCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/EmptyCatchCheck.h @@ -1,4 +1,4 @@ -//===--- EmptyCatchCheck.h - clang-tidy -------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp index 8eb7881a47a26..3d839b5111cc8 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp @@ -1,4 +1,4 @@ -//===--- ExceptionEscapeCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h index 14b9e8cc0a77f..974b07c42407d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h @@ -1,4 +1,4 @@ -//===--- ExceptionEscapeCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.cpp index d70cd2836c80f..96e5d5d06ad70 100644 --- a/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.cpp @@ -1,4 +1,4 @@ -//===--- FoldInitTypeCheck.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h b/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h index 435c440ddd29f..72dab600e3330 100644 --- a/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h @@ -1,4 +1,4 @@ -//===--- FoldInitTypeCheck.h - clang-tidy------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp index 070ed04efffc4..c3db8fa9b3af2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp @@ -1,4 +1,4 @@ -//===--- ForwardDeclarationNamespaceCheck.cpp - clang-tidy ------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.h b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.h index 266346960a141..700e52f7bb86d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.h @@ -1,4 +1,4 @@ -//===--- ForwardDeclarationNamespaceCheck.h - clang-tidy --------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp index 10b747e17e2ad..d372cbd798b2e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp @@ -1,4 +1,4 @@ -//===--- ForwardingReferenceOverloadCheck.cpp - clang-tidy-----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.h b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.h index 75297dbbdf41d..ead0edb6a2b3a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.h @@ -1,4 +1,4 @@ -//===--- ForwardingReferenceOverloadCheck.h - clang-tidy---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.cpp index 46bf20e34ce04..2211a0ba24ebc 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.cpp @@ -1,4 +1,4 @@ -//===--- ImplicitWideningOfMultiplicationResultCheck.cpp - clang-tidy -----===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.h b/clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.h index 077a4b847cd9c..74c64eb43f3c9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.h @@ -1,4 +1,4 @@ -//===--- ImplicitWideningOfMultiplicationResultCheck.h ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/InaccurateEraseCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InaccurateEraseCheck.cpp index 92425890a0ea8..b0dd9017c8426 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InaccurateEraseCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/InaccurateEraseCheck.cpp @@ -1,4 +1,4 @@ -//===--- InaccurateEraseCheck.cpp - clang-tidy-----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/InaccurateEraseCheck.h b/clang-tools-extra/clang-tidy/bugprone/InaccurateEraseCheck.h index 5bf29d04e4068..3485ffdd89257 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InaccurateEraseCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/InaccurateEraseCheck.h @@ -1,4 +1,4 @@ -//===--- InaccurateEraseCheck.h - clang-tidy---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IncDecInConditionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/IncDecInConditionsCheck.cpp index 73bffe93146e6..9ce6d42344cdf 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IncDecInConditionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/IncDecInConditionsCheck.cpp @@ -1,4 +1,4 @@ -//===--- IncDecInConditionsCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IncDecInConditionsCheck.h b/clang-tools-extra/clang-tidy/bugprone/IncDecInConditionsCheck.h index 1f2f1690041fd..2e2dcb1cde7bc 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IncDecInConditionsCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/IncDecInConditionsCheck.h @@ -1,4 +1,4 @@ -//===--- IncDecInConditionsCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableIfCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableIfCheck.cpp index 07cd90d64c2a4..84a99c36523ac 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableIfCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableIfCheck.cpp @@ -1,4 +1,4 @@ -//===--- IncorrectEnableIfCheck.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableIfCheck.h b/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableIfCheck.h index 37a52b425aa80..ea9cb4ecd0006 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableIfCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableIfCheck.h @@ -1,4 +1,4 @@ -//===--- IncorrectEnableIfCheck.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableSharedFromThisCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableSharedFromThisCheck.cpp index 425e46cf6c88c..1b3c4fe847af0 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableSharedFromThisCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableSharedFromThisCheck.cpp @@ -1,4 +1,4 @@ -//===--- IncorrectEnableSharedFromThisCheck.cpp - clang-tidy --------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableSharedFromThisCheck.h b/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableSharedFromThisCheck.h index 987c56059259b..866ae56631e36 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableSharedFromThisCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/IncorrectEnableSharedFromThisCheck.h @@ -1,4 +1,4 @@ -//===--- IncorrectEnableSharedFromThisCheck.h - clang-tidy ------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IncorrectRoundingsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/IncorrectRoundingsCheck.cpp index c2b0732a3e7bd..a4965c298adbc 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IncorrectRoundingsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/IncorrectRoundingsCheck.cpp @@ -1,4 +1,4 @@ -//===--- IncorrectRoundingsCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IncorrectRoundingsCheck.h b/clang-tools-extra/clang-tidy/bugprone/IncorrectRoundingsCheck.h index 292cfbaa688f9..a671a4af95f12 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IncorrectRoundingsCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/IncorrectRoundingsCheck.h @@ -1,4 +1,4 @@ -//===--- IncorrectRoundingsCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp index cda9c4e7a6e58..1e516c1573219 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp @@ -1,4 +1,4 @@ -//===--- InfiniteLoopCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.h b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.h index 6d0c6d75f2262..0766e2fa3c35d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.h @@ -1,4 +1,4 @@ -//===--- InfiniteLoopCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.cpp index 63c932c94b6fb..a262f9b9fc4bc 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.cpp @@ -1,4 +1,4 @@ -//===--- IntegerDivisionCheck.cpp - clang-tidy-----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h b/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h index b74b206f07373..b191cf693029e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h @@ -1,4 +1,4 @@ -//===--- IntegerDivisionCheck.h - clang-tidy---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.cpp index 7d92ef301aec3..1e657888b0fc0 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.cpp @@ -1,4 +1,4 @@ -//===--- InvalidEnumDefaultInitializationCheck.cpp - clang-tidy -----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.h b/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.h index 0746c4d025d1f..b9b4f20d111fc 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.h @@ -1,4 +1,4 @@ -//===--- InvalidEnumDefaultInitializationCheck.h - clang-tidy -*- C++ -*---===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/LambdaFunctionNameCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/LambdaFunctionNameCheck.cpp index 8517d2bac0d59..fb73e896fdb13 100644 --- a/clang-tools-extra/clang-tidy/bugprone/LambdaFunctionNameCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/LambdaFunctionNameCheck.cpp @@ -1,4 +1,4 @@ -//===--- LambdaFunctionNameCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/LambdaFunctionNameCheck.h b/clang-tools-extra/clang-tidy/bugprone/LambdaFunctionNameCheck.h index 04ba3596167e3..9e53951c4a7bd 100644 --- a/clang-tools-extra/clang-tidy/bugprone/LambdaFunctionNameCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/LambdaFunctionNameCheck.h @@ -1,4 +1,4 @@ -//===--- LambdaFunctionNameCheck.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp index 7d89e107a62d2..b16119daaad8a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp @@ -1,4 +1,4 @@ -//===--- MacroParenthesesCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.h b/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.h index 47c18d8d60bdd..34ea582153ebb 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.h @@ -1,4 +1,4 @@ -//===--- MacroParenthesesCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.cpp index 879040177079a..78a53d12bd312 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.cpp @@ -1,4 +1,4 @@ -//===--- MacroRepeatedSideEffectsCheck.cpp - clang-tidy--------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.h b/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.h index a15b8d4671e1b..25b33ba3082af 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.h @@ -1,4 +1,4 @@ -//===--- MacroRepeatedSideEffectsCheck.h - clang-tidy -----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MisleadingSetterOfReferenceCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MisleadingSetterOfReferenceCheck.cpp index 23de8d971898e..ff7f3020102ad 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisleadingSetterOfReferenceCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MisleadingSetterOfReferenceCheck.cpp @@ -1,4 +1,4 @@ -//===--- MisleadingSetterOfReferenceCheck.cpp - clang-tidy-----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MisleadingSetterOfReferenceCheck.h b/clang-tools-extra/clang-tidy/bugprone/MisleadingSetterOfReferenceCheck.h index 99e7a9435cfa9..b44f7a4ccb795 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisleadingSetterOfReferenceCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MisleadingSetterOfReferenceCheck.h @@ -1,4 +1,4 @@ -//===--- MisleadingSetterOfReferenceCheck.h - clang-tidy---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.cpp index 5b166b4b3a9bf..5925e28eed734 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.cpp @@ -1,4 +1,4 @@ -//===--- MisplacedOperatorInStrlenInAllocCheck.cpp - clang-tidy------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.h b/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.h index 93cf50d0b1c6f..764fd3ff97fed 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.h @@ -1,4 +1,4 @@ -//===--- MisplacedOperatorInStrlenInAllocCheck.h - clang-tidy----*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.cpp index 86785d36696dc..f5acafb7637ad 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.cpp @@ -1,4 +1,4 @@ -//===--- MisplacedPointerArithmeticInAllocCheck.cpp - clang-tidy-----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h b/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h index 83cd0ddf3cc49..a86d2a33d503f 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h @@ -1,4 +1,4 @@ -//===--- MisplacedPointerArithmeticInAllocCheck.h - clang-tidy---*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.cpp index 219972e0bdad7..d508e2aaba53c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.cpp @@ -1,4 +1,4 @@ -//===--- MisplacedWideningCastCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.h b/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.h index d03506838d07e..5fde5c7d0e46d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.h @@ -1,4 +1,4 @@ -//===--- MisplacedWideningCastCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.cpp index 5dc988d6662df..66559a0e5d7b5 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.cpp @@ -1,4 +1,4 @@ -//===--- MoveForwardingReferenceCheck.cpp - clang-tidy --------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.h b/clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.h index 2bbafe716124b..4fc876a232f37 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.h @@ -1,4 +1,4 @@ -//===--- MoveForwardingReferenceCheck.h - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.cpp index 1a23473fdd229..2eff013b2ab7d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.cpp @@ -1,4 +1,4 @@ -//===--- MultiLevelImplicitPointerConversionCheck.cpp - clang-tidy --------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.h b/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.h index 5ec78be0ea79b..d0a9a21523862 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MultiLevelImplicitPointerConversionCheck.h @@ -1,4 +1,4 @@ -//===--- MultiLevelImplicitPointerConversionCheck.h - clang-tidy *- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MultipleNewInOneExpressionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MultipleNewInOneExpressionCheck.cpp index 6344b4bb6271e..17aea9392bd26 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MultipleNewInOneExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MultipleNewInOneExpressionCheck.cpp @@ -1,4 +1,4 @@ -//===--- MultipleNewInOneExpressionCheck.cpp - clang-tidy------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MultipleNewInOneExpressionCheck.h b/clang-tools-extra/clang-tidy/bugprone/MultipleNewInOneExpressionCheck.h index 29eea12ff7192..53ad4a514bcc7 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MultipleNewInOneExpressionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MultipleNewInOneExpressionCheck.h @@ -1,4 +1,4 @@ -//===--- MultipleNewInOneExpressionCheck.h - clang-tidy----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.cpp index de05cc0e4f7fb..390f3dd472a5b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.cpp @@ -1,4 +1,4 @@ -//===--- MultipleStatementMacroCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h b/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h index 626cddce6734c..73a00fa493797 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h @@ -1,4 +1,4 @@ -//===--- MultipleStatementMacroCheck.h - clang-tidy--------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp index 249c77ca0c432..287ee95a4db55 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp @@ -1,4 +1,4 @@ -//===--- NarrowingConversionsCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.h b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.h index 116a8cba8d321..1f37086e3af55 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.h @@ -1,4 +1,4 @@ -//===--- NarrowingConversionsCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.cpp index 8023e32d53278..6d21c521bbca7 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoEscapeCheck.cpp - clang-tidy -----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.h b/clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.h index 476c7749d6e04..4760b171e75ce 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.h @@ -1,4 +1,4 @@ -//===--- NoEscapeCheck.h - clang-tidy ---------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/NonZeroEnumToBoolConversionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NonZeroEnumToBoolConversionCheck.cpp index e0b0df98d3409..067577f184281 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NonZeroEnumToBoolConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/NonZeroEnumToBoolConversionCheck.cpp @@ -1,4 +1,4 @@ -//===--- NonZeroEnumToBoolConversionCheck.cpp - clang-tidy ----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/NonZeroEnumToBoolConversionCheck.h b/clang-tools-extra/clang-tidy/bugprone/NonZeroEnumToBoolConversionCheck.h index f1cb81f05a723..977545fd5b65c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NonZeroEnumToBoolConversionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/NonZeroEnumToBoolConversionCheck.h @@ -1,4 +1,4 @@ -//===--- NonZeroNonZeroEnumToBoolConversionCheck.h - clang-tidy -*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp index 2ddcfa02bfb96..abde115d10a1b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp @@ -1,4 +1,4 @@ -//===----- NondeterministicPointerIterationOrderCheck.cpp - clang-tidy ----===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp index 203170d55f694..d4676842a97ff 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp @@ -1,4 +1,4 @@ -//===--- NotNullTerminatedResultCheck.cpp - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h index 1eeead02b17ab..fa2ca59b65300 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h @@ -1,4 +1,4 @@ -//===--- NotNullTerminatedResultCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp index cda9288c0531a..1b1e0401556e0 100644 --- a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp @@ -1,4 +1,4 @@ -//===--- OptionalValueConversionCheck.cpp - clang-tidy --------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.h b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.h index 888d29fc937bd..83e08e7359224 100644 --- a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.h @@ -1,4 +1,4 @@ -//===--- OptionalValueConversionCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.cpp index 56576ba360399..3c0ced96c05ac 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.cpp @@ -1,4 +1,4 @@ -//===--- ParentVirtualCallCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.h b/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.h index 293069fd24665..2f86d75a6d64d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.h @@ -1,4 +1,4 @@ -//===--- ParentVirtualCallCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/PointerArithmeticOnPolymorphicObjectCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/PointerArithmeticOnPolymorphicObjectCheck.cpp index 6e6ad10fabbb3..c21abad947912 100644 --- a/clang-tools-extra/clang-tidy/bugprone/PointerArithmeticOnPolymorphicObjectCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/PointerArithmeticOnPolymorphicObjectCheck.cpp @@ -1,4 +1,4 @@ -//===--- PointerArithmeticOnPolymorphicObjectCheck.cpp - clang-tidy--------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/PointerArithmeticOnPolymorphicObjectCheck.h b/clang-tools-extra/clang-tidy/bugprone/PointerArithmeticOnPolymorphicObjectCheck.h index 84f2d8e74ba87..13f9df656c98c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/PointerArithmeticOnPolymorphicObjectCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/PointerArithmeticOnPolymorphicObjectCheck.h @@ -1,4 +1,4 @@ -//===--- PointerArithmeticOnPolymorphicObjectCheck.h ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.cpp index f05924b81c4c0..57196adf38fb6 100644 --- a/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.cpp @@ -1,4 +1,4 @@ -//===--- PosixReturnCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.h b/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.h index e9d2263b06bc9..d72c86c060fb9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.h @@ -1,4 +1,4 @@ -//===--- PosixReturnCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.cpp index e717564847e4a..6abe53f47b8f9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantBranchConditionCheck.cpp - clang-tidy--------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.h b/clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.h index 1dfab69db0a22..854de520807fc 100644 --- a/clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantBranchConditionCheck.h - clang-tidy -----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp index 5812c18a2ccca..62e22450800ea 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp @@ -1,4 +1,4 @@ -//===--- ReservedIdentifierCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h index 474dc25f6386c..5b6fbff266217 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h @@ -1,4 +1,4 @@ -//===--- ReservedIdentifierCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.cpp index 295955a971d7e..a3265293bef58 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.cpp @@ -1,4 +1,4 @@ -//===--- ReturnConstRefFromParameterCheck.cpp - clang-tidy ----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.h b/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.h index 8768d07087383..8149bd29030b3 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ReturnConstRefFromParameterCheck.h @@ -1,4 +1,4 @@ -//===--- ReturnConstRefFromParameterCheck.h - clang-tidy --------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.cpp index 72036aaff158c..2997d7d3167e4 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.cpp @@ -1,4 +1,4 @@ -//===--- SharedPtrArrayMismatchCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.h b/clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.h index 3787eb345b857..c163c6b7fbafb 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.h @@ -1,4 +1,4 @@ -//===--- SharedPtrArrayMismatchCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp index fa38f5e07f832..86af5cbd94374 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp @@ -1,4 +1,4 @@ -//===--- SignalHandlerCheck.cpp - clang-tidy ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h index 3573cdf49e1d1..6589b19fbe048 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h @@ -1,4 +1,4 @@ -//===--- SignalHandlerCheck.h - clang-tidy ----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp index dfd3cbfcd664a..1041355a0caad 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp @@ -1,4 +1,4 @@ -//===--- SignedCharMisuseCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h index 42d6080736d30..c735ac634c801 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h @@ -1,4 +1,4 @@ -//===--- SignedCharMisuseCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.cpp index df2a3e26ea8dc..08ff82f57e3c2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.cpp @@ -1,4 +1,4 @@ -//===--- SizeofContainerCheck.cpp - clang-tidy-----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.h b/clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.h index f50ce99c6d4c0..8fc351b8c6cb2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.h @@ -1,4 +1,4 @@ -//===--- SizeofContainerCheck.h - clang-tidy---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp index 8da6227e172cd..139213ed359ba 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp @@ -1,4 +1,4 @@ -//===--- SizeofExpressionCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h index e979b4723cf2e..6d7c33977db93 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h @@ -1,4 +1,4 @@ -//===--- SizeofExpressionCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.cpp index fbdb676be68b0..ee797ecb694bd 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.cpp @@ -1,4 +1,4 @@ -//===--- SmartPtrArrayMismatchCheck.cpp - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.h b/clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.h index 7fcc4b6cfa0e6..b7703a7d61c03 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.h @@ -1,4 +1,4 @@ -//===--- SharedPtrArrayMismatchCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.cpp index 9bdd167a7afe9..1e8058bc4abc9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.cpp @@ -1,4 +1,4 @@ -//===--- SpuriouslyWakeUpFunctionsCheck.cpp - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.h b/clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.h index 098299aea7dee..23bf8056c0f61 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.h @@ -1,4 +1,4 @@ -//===--- SpuriouslyWakeUpFunctionsCheck.h - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StandaloneEmptyCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/StandaloneEmptyCheck.cpp index 5d9e91e0b82c7..a7958cc229ffe 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StandaloneEmptyCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/StandaloneEmptyCheck.cpp @@ -1,4 +1,4 @@ -//===--- StandaloneEmptyCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StandaloneEmptyCheck.h b/clang-tools-extra/clang-tidy/bugprone/StandaloneEmptyCheck.h index 6eaf3e40cb782..85b25d8e25abc 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StandaloneEmptyCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/StandaloneEmptyCheck.h @@ -1,4 +1,4 @@ -//===--- StandaloneEmptyCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.cpp index d1902b658061b..e4f7a1778fd44 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.cpp @@ -1,4 +1,4 @@ -//===--- StringConstructorCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h b/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h index e90cf44b07680..5ab05e119abe9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h @@ -1,4 +1,4 @@ -//===--- StringConstructorCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.cpp index 4f93b3ef779f5..93a55ef549896 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.cpp @@ -1,4 +1,4 @@ -//===--- StringIntegerAssignmentCheck.cpp - clang-tidy---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.h b/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.h index 1e86fdfaa3a89..68783b7da53c6 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.h @@ -1,4 +1,4 @@ -//===--- StringIntegerAssignmentCheck.h - clang-tidy-------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.cpp index 444f3081b704d..b3e0673ea6103 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.cpp @@ -1,4 +1,4 @@ -//===--- StringLiteralWithEmbeddedNulCheck.cpp - clang-tidy----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.h b/clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.h index 85d172d835d21..59aece123057a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.h @@ -1,4 +1,4 @@ -//===--- StringLiteralWithEmbeddedNulCheck.h - clang-tidy--------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp index 20789b3123e2f..faa07fff5a369 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp @@ -1,4 +1,4 @@ -//===--- StringviewNullptrCheck.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.h b/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.h index 20757cbbaaf7a..81a10101049c9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.h @@ -1,4 +1,4 @@ -//===--- StringviewNullptrCheck.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.cpp index f2067bec001cc..8dbe1c0153f35 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousEnumUsageCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.h index c8a70c5f07043..542bf7577f927 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousEnumUsageCheck.h - clang-tidy------------------*- C++-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp index 09ba79f055752..843368e723f1f 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousIncludeCheck.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h index 1167b5a4593f7..03f569e5a483e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousIncludeCheck.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.cpp index 84957e0b8190c..d1df2a8634035 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousMemoryComparisonCheck.cpp - clang-tidy -----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.h index 5625739ef1327..c36d256242e19 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousMemoryComparisonCheck.h - clang-tidy ---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.cpp index cc1bd622039bc..b1d12ba306814 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousMemsetUsageCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h index db2c09a86ddbd..41ef525c7f9dd 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousMemsetUsageCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.cpp index 5b1b28dbfbadd..a41f65083653a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousMissingCommaCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.h index 215344b6bfe53..3a26b0a4a317e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousMissingCommaCheck.h - clang-tidy--------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousReallocUsageCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousReallocUsageCheck.cpp index 221cc832882f2..b5da8016f2cc8 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousReallocUsageCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousReallocUsageCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousReallocUsageCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousReallocUsageCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousReallocUsageCheck.h index 2dcbd348697b4..2517d5f7ae319 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousReallocUsageCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousReallocUsageCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousReallocUsageCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp index e93ba760f447e..543d31285af8c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousSemicolonCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.h index c97bfad665595..73131c7f9f12a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousSemicolonCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.cpp index 33cf04dd56593..7519685418c8c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousStringCompareCheck.cpp - clang-tidy---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.h index c399c26786d89..6f01b1ad087bd 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousStringCompareCheck.h - clang-tidy-------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp index 8f4b0c5e0dced..d239cbe1fd2cf 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousStringviewDataUsageCheck.cpp - clang-tidy --------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.h index 31eca0a48722f..57cb164af8565 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringviewDataUsageCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousStringviewDataUsageCheck.h - clang-tidy -------//C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.cpp index 7fdb67e9a7cd9..bcedff5ef5aa2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.cpp @@ -1,4 +1,4 @@ -//===--- SwappedArgumentsCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.h b/clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.h index dcf57eca956f5..e9e779c0cb3d9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.h @@ -1,4 +1,4 @@ -//===--- SwappedArgumentsCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SwitchMissingDefaultCaseCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SwitchMissingDefaultCaseCheck.cpp index b8fc62e8c3292..d821c40f2760a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SwitchMissingDefaultCaseCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SwitchMissingDefaultCaseCheck.cpp @@ -1,4 +1,4 @@ -//===--- SwitchMissingDefaultCaseCheck.cpp - clang-tidy -------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/SwitchMissingDefaultCaseCheck.h b/clang-tools-extra/clang-tidy/bugprone/SwitchMissingDefaultCaseCheck.h index b0d6e2062b997..f5237775650ea 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SwitchMissingDefaultCaseCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SwitchMissingDefaultCaseCheck.h @@ -1,4 +1,4 @@ -//===--- SwitchMissingDefaultCaseCheck.h - clang-tidy -----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/TaggedUnionMemberCountCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/TaggedUnionMemberCountCheck.cpp index 02f4421efdbf4..a85a136b92e87 100644 --- a/clang-tools-extra/clang-tidy/bugprone/TaggedUnionMemberCountCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/TaggedUnionMemberCountCheck.cpp @@ -1,4 +1,4 @@ -//===--- TaggedUnionMemberCountCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/TaggedUnionMemberCountCheck.h b/clang-tools-extra/clang-tidy/bugprone/TaggedUnionMemberCountCheck.h index 8b9d677d00b40..0c337df405061 100644 --- a/clang-tools-extra/clang-tidy/bugprone/TaggedUnionMemberCountCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/TaggedUnionMemberCountCheck.h @@ -1,4 +1,4 @@ -//===--- TaggedUnionMemberCountCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.cpp index d31211b571734..c8ce77ed6a4ab 100644 --- a/clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.cpp @@ -1,4 +1,4 @@ -//===--- TerminatingContinueCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.h b/clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.h index 0593c7433c94e..79a794de3819a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.h @@ -1,4 +1,4 @@ -//===--- TerminatingContinueCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.cpp index 17d2e75e4f666..89eafb15f2652 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.cpp @@ -1,4 +1,4 @@ -//===--- ThrowKeywordMissingCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.h b/clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.h index 018dceb010c26..ee1e7d20d39e0 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.h @@ -1,4 +1,4 @@ -//===--- ThrowKeywordMissingCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.cpp index 4ceeefb78ee82..536b6806c66e6 100644 --- a/clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.cpp @@ -1,4 +1,4 @@ -//===--- TooSmallLoopVariableCheck.cpp - clang-tidy -----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.h b/clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.h index 03065e3a706a7..e2c1bb7b002e3 100644 --- a/clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.h @@ -1,4 +1,4 @@ -//===--- TooSmallLoopVariableCheck.h - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.cpp index 0b51d5677929c..e8f204128cae3 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.cpp @@ -1,4 +1,4 @@ -//===--- UncheckedOptionalAccessCheck.cpp - clang-tidy --------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.h b/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.h index e2fcccbfefb26..3c0f261126823 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.h @@ -1,4 +1,4 @@ -//===--- UncheckedOptionalAccessCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.cpp index 4f6bc18151789..c5a0b3d6d963b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.cpp @@ -1,4 +1,4 @@ -//===--- UndefinedMemoryManipulationCheck.cpp - clang-tidy-----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h b/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h index 5e2d7d8ce48ec..fd067c48a16e0 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h @@ -1,4 +1,4 @@ -//===--- UndefinedMemoryManipulationCheck.h - clang-tidy---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.cpp index a4c1fd53dfbe2..c358a8e0378bd 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.cpp @@ -1,4 +1,4 @@ -//===--- UndelegatedConstructorCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h b/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h index 03cf5606ef529..18465f7353b1d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h @@ -1,4 +1,4 @@ -//===--- UndelegatedConstructorCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.cpp index 5e220017c97f4..bf30753f0e5ef 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnhandledExceptionAtNewCheck.cpp - clang-tidy --------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.h index 6e2c6aa373de7..0724b4ac6d3e9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.h @@ -1,4 +1,4 @@ -//===--- UnhandledExceptionAtNewCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.cpp index c4c4267545b59..b696089c006c7 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnhandledSelfAssignmentCheck.cpp - clang-tidy --------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.h index f666e6bfad2e6..61d33028aadc8 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.h @@ -1,4 +1,4 @@ -//===--- UnhandledSelfAssignmentCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnintendedCharOstreamOutputCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnintendedCharOstreamOutputCheck.cpp index 57e1f744fcd7d..bce46572bdeb9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnintendedCharOstreamOutputCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnintendedCharOstreamOutputCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnintendedCharOstreamOutputCheck.cpp - clang-tidy ----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnintendedCharOstreamOutputCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnintendedCharOstreamOutputCheck.h index 0759e3d1eb460..af53dc6158696 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnintendedCharOstreamOutputCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UnintendedCharOstreamOutputCheck.h @@ -1,4 +1,4 @@ -//===--- UnintendedCharOstreamOutputCheck.h - clang-tidy --------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UniquePtrArrayMismatchCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UniquePtrArrayMismatchCheck.cpp index 8d09b4b320c2c..34c2c6dd4642d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UniquePtrArrayMismatchCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UniquePtrArrayMismatchCheck.cpp @@ -1,4 +1,4 @@ -//===--- UniquePtrArrayMismatchCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UniquePtrArrayMismatchCheck.h b/clang-tools-extra/clang-tidy/bugprone/UniquePtrArrayMismatchCheck.h index fb7531ab146c6..36be247c409cb 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UniquePtrArrayMismatchCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UniquePtrArrayMismatchCheck.h @@ -1,4 +1,4 @@ -//===--- UniquePtrArrayMismatchCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp index 0f2c18ae02663..0399af2a673f4 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnsafeFunctionsCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.h index 9b2ec990be01f..6495bd34f6c58 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.h @@ -1,4 +1,4 @@ -//===--- UnsafeFunctionsCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnusedLocalNonTrivialVariableCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnusedLocalNonTrivialVariableCheck.cpp index 3b6969a57c2b8..2b7db2548cfb2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnusedLocalNonTrivialVariableCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnusedLocalNonTrivialVariableCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnusedLocalNonTrivialVariableCheck.cpp - clang-tidy --------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnusedLocalNonTrivialVariableCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnusedLocalNonTrivialVariableCheck.h index e79b803a2158b..92eaf290f2073 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnusedLocalNonTrivialVariableCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UnusedLocalNonTrivialVariableCheck.h @@ -1,4 +1,4 @@ -//===--- UnusedLocalNonTrivialVariableCheck.h - clang-tidy ------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp index b17d3868dd76a..dae679baf14e5 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnusedRaiiCheck.cpp - clang-tidy ---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.h index 219fa07fe8265..376f664f74548 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.h @@ -1,4 +1,4 @@ -//===--- UnusedRaiiCheck.h - clang-tidy -------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.cpp index 6f08c41b41887..c2fc4af86391d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnusedReturnValueCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.h index d65a567e1c468..f81603cadbe80 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.h @@ -1,4 +1,4 @@ -//===--- UnusedReturnValueCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp index f9906ebf6ea26..efb5ec64689cf 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseAfterMoveCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.h b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.h index c14e802847415..ac85c80ee0b5b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.h @@ -1,4 +1,4 @@ -//===--- UseAfterMoveCheck.h - clang-tidy ---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.cpp index 509fce3a38471..0c8d2b8ef40f9 100644 --- a/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.cpp @@ -1,4 +1,4 @@ -//===--- VirtualNearMissCheck.cpp - clang-tidy-----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h b/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h index 0d6b6db7f9a7f..b852dffa7c6ea 100644 --- a/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h @@ -1,4 +1,4 @@ -//===--- VirtualNearMissCheck.h - clang-tidy---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp index a0d0ac1007c3e..c9c150dc230b5 100644 --- a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- CERTTidyModule.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.cpp b/clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.cpp index d377c24da1f46..d87396f5189b1 100644 --- a/clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.cpp @@ -1,4 +1,4 @@ -//===-- CommandProcessorCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.h b/clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.h index 1f9206cae73d7..94234f284c045 100644 --- a/clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.h +++ b/clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.h @@ -1,4 +1,4 @@ -//===--- CommandInterpreterCheck.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.cpp b/clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.cpp index 2c2248afb69e7..45c170ec20f4e 100644 --- a/clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.cpp @@ -1,4 +1,4 @@ -//===--- DefaultOperatorNewCheck.cpp - clang-tidy --------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.h b/clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.h index d38a9edb0f95d..f8cb4d6e32d69 100644 --- a/clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.h +++ b/clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.h @@ -1,4 +1,4 @@ -//===--- DefaultOperatorNewCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.cpp index e86ed6319a695..79fbc66b5f8a3 100644 --- a/clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.cpp @@ -1,4 +1,4 @@ -//===--- DontModifyStdNamespaceCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.h b/clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.h index d35affa0adc9c..cfcd878644ddb 100644 --- a/clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.h +++ b/clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.h @@ -1,4 +1,4 @@ -//===--- DontModifyStdNamespaceCheck.h - clang-tidy--------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/FloatLoopCounter.cpp b/clang-tools-extra/clang-tidy/cert/FloatLoopCounter.cpp index 46acc9f4716c5..01299e0e5ab48 100644 --- a/clang-tools-extra/clang-tidy/cert/FloatLoopCounter.cpp +++ b/clang-tools-extra/clang-tidy/cert/FloatLoopCounter.cpp @@ -1,4 +1,4 @@ -//===--- FloatLoopCounter.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/FloatLoopCounter.h b/clang-tools-extra/clang-tidy/cert/FloatLoopCounter.h index 7bc4422aef29b..e9207385f0d20 100644 --- a/clang-tools-extra/clang-tidy/cert/FloatLoopCounter.h +++ b/clang-tools-extra/clang-tidy/cert/FloatLoopCounter.h @@ -1,4 +1,4 @@ -//===--- FloatLoopCounter.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.cpp b/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.cpp index f1d2bcaa916d5..4fe9c6c22590b 100644 --- a/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.cpp @@ -1,4 +1,4 @@ -//===--- LimitedRandomnessCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h b/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h index 051e100e2cec8..b024b9008d876 100644 --- a/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h +++ b/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h @@ -1,4 +1,4 @@ -//===--- LimitedRandomnessCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.cpp b/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.cpp index a97fd720df86a..fb9d72ce6bd31 100644 --- a/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.cpp @@ -1,4 +1,4 @@ -//===--- MutatingCopyCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.h b/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.h index 8cb7b8e5fa2ce..ecb3d164b5272 100644 --- a/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.h +++ b/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.h @@ -1,4 +1,4 @@ -//===--- MutatingCopyCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.cpp b/clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.cpp index afeef2a80ba54..e266cf995e8a7 100644 --- a/clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.cpp @@ -1,4 +1,4 @@ -//===--- NonTrivialTypesLibcMemoryCallsCheck.cpp - clang-tidy ----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.h b/clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.h index aecb37dd7c739..221bdca0baae7 100644 --- a/clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.h +++ b/clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.h @@ -1,4 +1,4 @@ -//===--- NonTrivialTypesLibcMemoryCallsCheck.h - clang-tidy -----*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp index cf7b36e05dc4f..aa95fadb0290b 100644 --- a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProperlySeededRandomGeneratorCheck.cpp - clang-tidy---------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h index 9f96c6124c6f0..d34b8e702f670 100644 --- a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h +++ b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h @@ -1,4 +1,4 @@ -//===--- ProperlySeededRandomGeneratorCheck.h - clang-tidy-------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.cpp b/clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.cpp index e7d9342bf748f..4f282b2c6b344 100644 --- a/clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.cpp @@ -1,4 +1,4 @@ -//===--- SetLongJmpCheck.cpp - clang-tidy----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.h b/clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.h index 6e5d8385d9428..ced3d8cd1b316 100644 --- a/clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.h +++ b/clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.h @@ -1,4 +1,4 @@ -//===--- SetLongJmpCheck.h - clang-tidy--------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.cpp b/clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.cpp index 12830a64bf23e..8f31851a63edc 100644 --- a/clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.cpp @@ -1,4 +1,4 @@ -//===--- StaticObjectExceptionCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.h b/clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.h index 26ae6b478b44d..6de9929fb5cc7 100644 --- a/clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.h +++ b/clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.h @@ -1,4 +1,4 @@ -//===--- StaticObjectExceptionCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.cpp b/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.cpp index cbc2991ca6399..2225a90aeece1 100644 --- a/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.cpp @@ -1,4 +1,4 @@ -//===--- ThrownExceptionTypeCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.h b/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.h index 47469a1e82c7c..9b97feb7fe5f5 100644 --- a/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.h +++ b/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.h @@ -1,4 +1,4 @@ -//===--- ThrownExceptionTypeCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.cpp b/clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.cpp index 5fba32417db42..c330d4691443f 100644 --- a/clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.cpp @@ -1,4 +1,4 @@ -//===-- VariadicFunctionDefCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.h b/clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.h index a082e370c3228..a7f5f11974aa3 100644 --- a/clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.h +++ b/clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.h @@ -1,4 +1,4 @@ -//===--- VariadicFunctionDefCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/concurrency/ConcurrencyTidyModule.cpp b/clang-tools-extra/clang-tidy/concurrency/ConcurrencyTidyModule.cpp index 6c58c506dc903..135a54d4565cb 100644 --- a/clang-tools-extra/clang-tidy/concurrency/ConcurrencyTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/concurrency/ConcurrencyTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- ConcurrencyTidyModule.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.cpp b/clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.cpp index 7e80471b12302..f8050bcfe3263 100644 --- a/clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.cpp +++ b/clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.cpp @@ -1,4 +1,4 @@ -//===--- MtUnsafeCheck.cpp - clang-tidy -----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.h b/clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.h index ccee5b3a748a3..c5c707778bc32 100644 --- a/clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.h +++ b/clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.h @@ -1,4 +1,4 @@ -//===--- MtUnsafeCheck.h - clang-tidy ---------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.cpp b/clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.cpp index 130b56fb6cd04..9e9c908565497 100644 --- a/clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.cpp +++ b/clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.cpp @@ -1,4 +1,4 @@ -//===--- ThreadCanceltypeAsynchronousCheck.cpp - clang-tidy ---------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.h b/clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.h index dbfcb265640c9..2d5d82dfd9285 100644 --- a/clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.h +++ b/clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.h @@ -1,4 +1,4 @@ -//===--- ThreadCanceltypeAsynchronousCheck.h - clang-tidy -------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidCapturingLambdaCoroutinesCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidCapturingLambdaCoroutinesCheck.cpp index 3c99831f9d640..15fb53c5c57b7 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidCapturingLambdaCoroutinesCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidCapturingLambdaCoroutinesCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidCapturingLambdaCoroutinesCheck.cpp - clang-tidy -------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidCapturingLambdaCoroutinesCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidCapturingLambdaCoroutinesCheck.h index b32e2662b5fba..de59ff189c595 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidCapturingLambdaCoroutinesCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidCapturingLambdaCoroutinesCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidCapturingLambdaCoroutinesCheck.h - clang-tidy -----*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp index dd913c92d60a0..78bcc3e7e0ecc 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidConstOrRefDataMembersCheck.cpp - clang-tidy -----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.h index de55e0049eaf7..9d458fe9a4d00 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidConstOrRefDataMembersCheck.h - clang-tidy ---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidDoWhileCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidDoWhileCheck.cpp index d623e05e15cc0..5ecfd38e80918 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidDoWhileCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidDoWhileCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidDoWhileCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidDoWhileCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidDoWhileCheck.h index 45259c23e26de..0756d0860f961 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidDoWhileCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidDoWhileCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidDoWhileCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.cpp index b14587ad7db83..4fb0029cc4323 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidGotoCheck.cpp - clang-tidy-----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.h index 8eae409462c91..2b13df795d87c 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidGotoCheck.h - clang-tidy---------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.cpp index a97ec9fe3fe3d..f0e66e44690b2 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidNonConstGlobalVariablesCheck.cpp - clang-tidy ---------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.h index a912763489db9..9c40fa3e9d341 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidNonConstGlobalVariablesCheck.h - clang-tidy -------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidReferenceCoroutineParametersCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidReferenceCoroutineParametersCheck.cpp index 3eca364d16c35..7ef1e2bc6178d 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidReferenceCoroutineParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidReferenceCoroutineParametersCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidReferenceCoroutineParametersCheck.cpp - clang-tidy ----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidReferenceCoroutineParametersCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidReferenceCoroutineParametersCheck.h index 0a4d5b33f2396..3469ea7a8efee 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidReferenceCoroutineParametersCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidReferenceCoroutineParametersCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidReferenceCoroutineParametersCheck.h - clang-tidy --*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp index cc1ae156eef3e..5f4c9b48e346a 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp @@ -1,4 +1,4 @@ -//===-- CppCoreGuidelinesTidyModule.cpp - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp index 3eef2fd12cc8e..ed595e1148dec 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp @@ -1,4 +1,4 @@ -//===--- InitVariablesCheck.cpp - clang-tidy ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.h index 901500ac4b915..a1476494b4046 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.h @@ -1,4 +1,4 @@ -//===--- InitVariablesCheck.h - clang-tidy ----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.cpp index e9f0bd98cad16..788d0571ac7ff 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.cpp @@ -1,4 +1,4 @@ -//===--- InterfacesGlobalInitCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h index 4b04ec112486b..2141fc2423bdf 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h @@ -1,4 +1,4 @@ -//===--- InterfacesGlobalInitCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.cpp index 11eb056e916d3..766cae45f15b5 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.cpp @@ -1,4 +1,4 @@ -//===--- MacroUsageCheck.cpp - clang-tidy----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.h index 876a18256080e..dd553ba613f1e 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.h @@ -1,4 +1,4 @@ -//===--- MacroUsageCheck.h - clang-tidy--------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/MisleadingCaptureDefaultByValueCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/MisleadingCaptureDefaultByValueCheck.cpp index 5dee7f91a9341..57d98ee1fd8b4 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/MisleadingCaptureDefaultByValueCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/MisleadingCaptureDefaultByValueCheck.cpp @@ -1,4 +1,4 @@ -//===--- MisleadingCaptureDefaultByValueCheck.cpp - clang-tidy-------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/MisleadingCaptureDefaultByValueCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/MisleadingCaptureDefaultByValueCheck.h index dcf2ce9afc740..87187b3b70bcb 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/MisleadingCaptureDefaultByValueCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/MisleadingCaptureDefaultByValueCheck.h @@ -1,4 +1,4 @@ -//===--- MisleadingCaptureDefaultByValueCheck.h - clang-tidy---------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp index 75da6de9b5f13..090ab2f0474c4 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp @@ -1,4 +1,4 @@ -//===--- MissingStdForwardCheck.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.h index f833b8031f8af..247291076d939 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.h @@ -1,4 +1,4 @@ -//===--- MissingStdForwardCheck.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.cpp index b81c6230b8941..22cd1e4e29a68 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoMallocCheck.cpp - clang-tidy------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h index 8dea1465c3de7..4e664197b5f72 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h @@ -1,4 +1,4 @@ -//===--- NoMallocCheck.h - clang-tidy----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoSuspendWithLockCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoSuspendWithLockCheck.cpp index 29470b1f725fb..43df277927d8b 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoSuspendWithLockCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoSuspendWithLockCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoSuspendWithLockCheck.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoSuspendWithLockCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoSuspendWithLockCheck.h index c7b7f476003fb..877a5173e7f10 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoSuspendWithLockCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoSuspendWithLockCheck.h @@ -1,4 +1,4 @@ -//===--- NoSuspendWithLockCheck.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.cpp index 6cdd5bcac6370..f4e89470a80da 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.cpp @@ -1,4 +1,4 @@ -//===--- OwningMemoryCheck.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h index 3ab8f34b580f9..e191f09943710 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h @@ -1,4 +1,4 @@ -//===--- OwningMemoryCheck.h - clang-tidy------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp index 79cd4bbcc9a60..9913671c6f74e 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp @@ -1,4 +1,4 @@ -//===--- PreferMemberInitializerCheck.cpp - clang-tidy -------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h index b3f8284b435af..6275aa61ba03d 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h @@ -1,4 +1,4 @@ -//===--- PreferMemberInitializerCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.cpp index 6f67ab955baa3..f3237f4d7dae0 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProBoundsArrayToPointerDecayCheck.cpp - clang-tidy----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h index bcbe40b5dd14c..abd4e5a77009d 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h @@ -1,4 +1,4 @@ -//===--- ProBoundsArrayToPointerDecayCheck.h - clang-tidy--------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.cpp index 35f432efa88ca..dd7b2b553b7a1 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.cpp @@ -1,4 +1,4 @@ -//===--- ProBoundsAvoidUncheckedContainerAccess.cpp - clang-tidy ----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.h index cfd52d69c0f58..2a89be4724037 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.h @@ -1,4 +1,4 @@ -//===--- ProBoundsAvoidUncheckedContainerAccess.h - clang-tidy --*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp index 20f9a2e549fe2..634ec186616d5 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProBoundsConstantArrayIndexCheck.cpp - clang-tidy-----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h index a583cc78b2c54..19d4ef8e25121 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h @@ -1,4 +1,4 @@ -//===--- ProBoundsConstantArrayIndexCheck.h - clang-tidy---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.cpp index 51995c5f64ef6..b1cf7152aacd4 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProBoundsPointerArithmeticCheck.cpp - clang-tidy------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h index 785f754055fb8..2bd113b38c4d4 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h @@ -1,4 +1,4 @@ -//===--- ProBoundsPointerArithmeticCheck.h - clang-tidy----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.cpp index b234c2a041d8c..0d038bfca60d5 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProTypeConstCastCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h index 8d93633a321b5..e05adc966a496 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h @@ -1,4 +1,4 @@ -//===--- ProTypeConstCastCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.cpp index 5e255dcaacd26..b9867c2393f0b 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProTypeCstyleCastCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h index b7e3525e397b2..e6819c40a2bfc 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h @@ -1,4 +1,4 @@ -//===--- ProTypeCstyleCastCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp index a79c5281d6054..5de4e33a1e16d 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProTypeMemberInitCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h index cfe7c8735a0e0..58125303fb59b 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h @@ -1,4 +1,4 @@ -//===--- ProTypeMemberInitCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.cpp index 94cea79888555..1cd4bf7435be4 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProTypeReinterpretCastCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h index da001bfb85d78..63b04261ea436 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h @@ -1,4 +1,4 @@ -//===--- ProTypeReinterpretCast.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.cpp index 14616ee8514f7..c200a79cb8c49 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProTypeStaticCastDowncastCheck.cpp - clang-tidy-------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h index b9e78a82a39f2..266441fd9144f 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h @@ -1,4 +1,4 @@ -//===--- ProTypeStaticCastDowncastCheck.h - clang-tidy-----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.cpp index 2793dfbc0eb3f..4361177db4251 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProTypeUnionAccessCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h index e90d154f9630f..5127e652b6466 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h @@ -1,4 +1,4 @@ -//===--- ProTypeUnionAccessCheck.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.cpp index 3923df312791d..431b2a76feeea 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.cpp @@ -1,4 +1,4 @@ -//===--- ProTypeVarargCheck.cpp - clang-tidy-------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h index acb3d274908da..f3b20e6e793e5 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h @@ -1,4 +1,4 @@ -//===--- ProTypeVarargCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.cpp index 272152644d7dd..c40ac7ab5102b 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.cpp @@ -1,4 +1,4 @@ -//===--- RvalueReferenceParamNotMovedCheck.cpp - clang-tidy ---------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.h index 950c0206745d7..739e1d706acc3 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/RvalueReferenceParamNotMovedCheck.h @@ -1,4 +1,4 @@ -//===--- RvalueReferenceParamNotMovedCheck.h - clang-tidy -------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.cpp index 6508bfd5ca808..fe95dbba68118 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.cpp @@ -1,4 +1,4 @@ -//===--- SlicingCheck.cpp - clang-tidy-------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h index 317547f0a9c87..6d89a8a622a61 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h @@ -1,4 +1,4 @@ -//===--- SlicingCheck.h - clang-tidy-----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.cpp index 0b6b8d9c97135..b38a0c66eb582 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.cpp @@ -1,4 +1,4 @@ -//===--- SpecialMemberFunctionsCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h index c18ed7db055ba..ffd072a7f6a98 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h @@ -1,4 +1,4 @@ -//===--- SpecialMemberFunctionsCheck.h - clang-tidy--------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.cpp index ec7d9237afa3c..9e809e0bedb49 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseEnumClassCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.h index dfa4b7e3fda62..c699f9116a120 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.h @@ -1,4 +1,4 @@ -//===--- UseEnumClassCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.cpp index e31d046565677..770088991419b 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.cpp @@ -1,4 +1,4 @@ -//===--- VirtualClassDestructorCheck.cpp - clang-tidy -----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.h index 11bd598f81ac3..2c9d92ddeb4a7 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.h @@ -1,4 +1,4 @@ -//===--- VirtualClassDestructorCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/darwin/AvoidSpinlockCheck.cpp b/clang-tools-extra/clang-tidy/darwin/AvoidSpinlockCheck.cpp index 2d3a7e50f242e..875a851586578 100644 --- a/clang-tools-extra/clang-tidy/darwin/AvoidSpinlockCheck.cpp +++ b/clang-tools-extra/clang-tidy/darwin/AvoidSpinlockCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidSpinlockCheck.cpp - clang-tidy-------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/darwin/AvoidSpinlockCheck.h b/clang-tools-extra/clang-tidy/darwin/AvoidSpinlockCheck.h index 5b5285710c3b0..78cc968ba4efd 100644 --- a/clang-tools-extra/clang-tidy/darwin/AvoidSpinlockCheck.h +++ b/clang-tools-extra/clang-tidy/darwin/AvoidSpinlockCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidSpinlockCheck.h - clang-tidy-----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/darwin/DarwinTidyModule.cpp b/clang-tools-extra/clang-tidy/darwin/DarwinTidyModule.cpp index bc8c91a9ed413..0330626a7cd58 100644 --- a/clang-tools-extra/clang-tidy/darwin/DarwinTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/darwin/DarwinTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- MiscTidyModule.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.cpp b/clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.cpp index 75d38a9724950..194f4217f73e3 100644 --- a/clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.cpp +++ b/clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.cpp @@ -1,4 +1,4 @@ -//===--- DispatchOnceNonstaticCheck.cpp - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.h b/clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.h index ddf6dfa22cf0a..484b4f93e75f5 100644 --- a/clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.h +++ b/clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.h @@ -1,4 +1,4 @@ -//===--- DispatchOnceNonstaticCheck.h - clang-tidy --------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.cpp b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.cpp index 96cd30e0badac..88766d3e5e972 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.cpp +++ b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.cpp @@ -1,4 +1,4 @@ -//===--- DefaultArgumentsCallsCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h index 120dc90b2cbc0..9ba311c04e679 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h +++ b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h @@ -1,4 +1,4 @@ -//===--- DefaultArgumentsCallsCheck.h - clang-tidy --------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.cpp b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.cpp index 05a663bf3d239..d80511eb626f5 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.cpp +++ b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.cpp @@ -1,4 +1,4 @@ -//===--- DefaultArgumentsDeclarationsCheck.cpp - clang-tidy ---------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h index da73fa4064cbd..b5a19c3b7c22e 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h +++ b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h @@ -1,4 +1,4 @@ -//===--- DefaultArgumentsDeclarationsCheck.h - clang-tidy -------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/FuchsiaTidyModule.cpp b/clang-tools-extra/clang-tidy/fuchsia/FuchsiaTidyModule.cpp index d7a70b39bdc55..f280a1b07bf39 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/FuchsiaTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/fuchsia/FuchsiaTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- FuchsiaTidyModule.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.cpp b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.cpp index 4382f9df5336e..80de0282ee595 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.cpp +++ b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.cpp @@ -1,4 +1,4 @@ -//===--- MultipleInheritanceCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h index be5942c9520ae..838987d20014f 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h +++ b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h @@ -1,4 +1,4 @@ -//===--- MultipleInheritanceCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.cpp b/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.cpp index 85864006e4d7b..e202c288d6986 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.cpp +++ b/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.cpp @@ -1,4 +1,4 @@ -//===--- OverloadedOperatorCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h b/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h index d26349d6e9afc..b974c6d7a4473 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h +++ b/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h @@ -1,4 +1,4 @@ -//===--- OverloadedOperatorCheck.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.cpp b/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.cpp index ac55d01208b63..9e540e03d365b 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.cpp +++ b/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.cpp @@ -1,4 +1,4 @@ -//===--- StaticallyConstructedObjectsCheck.cpp - clang-tidy----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.h b/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.h index 6c65c8cfeb22f..6b4ef681ee188 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.h +++ b/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.h @@ -1,4 +1,4 @@ -//===--- StaticallyConstructedObjectsCheck.h - clang-tidy--------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.cpp b/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.cpp index b619812837753..b2c1acf358240 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.cpp +++ b/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.cpp @@ -1,4 +1,4 @@ -//===--- TrailingReturnCheck.cpp - clang-tidy------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.h b/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.h index 70551844898f1..db6bc33ca0e06 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.h +++ b/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.h @@ -1,4 +1,4 @@ -//===--- TrailingReturnCheck.h - clang-tidy----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.cpp b/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.cpp index 20bd036fb265f..b6fb22c66d374 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.cpp +++ b/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.cpp @@ -1,4 +1,4 @@ -//===--- VirtualInheritanceCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.h b/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.h index 1bdf19f9146fb..8a3182dd57df7 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.h +++ b/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.h @@ -1,4 +1,4 @@ -//===--- VirtualInheritanceCheck.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/AvoidCStyleCastsCheck.cpp b/clang-tools-extra/clang-tidy/google/AvoidCStyleCastsCheck.cpp index 14e11eb0bc697..174ecb0ed7b77 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidCStyleCastsCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/AvoidCStyleCastsCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidCStyleCastsCheck.cpp - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/AvoidCStyleCastsCheck.h b/clang-tools-extra/clang-tidy/google/AvoidCStyleCastsCheck.h index 4267b896b6992..dbd2034418762 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidCStyleCastsCheck.h +++ b/clang-tools-extra/clang-tidy/google/AvoidCStyleCastsCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidCStyleCastsCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.cpp b/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.cpp index adcbf245ef7a3..daf49481bf3b0 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidNSObjectNewCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.h b/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.h index 37b9440396948..fda0d5906a5cd 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.h +++ b/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidNSObjectNewCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.cpp b/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.cpp index 6322f63233590..73476571c252f 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidThrowingObjCExceptionCheck.cpp - clang-tidy------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.h b/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.h index 58b46e0a075a4..d32c02b9cfb4b 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.h +++ b/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidThrowingObjCExceptionCheck.h - clang-tidy----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp index f2507f0b60e71..b335463bc78bd 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidUnderscoreInGoogletestNameCheck.cpp - clang-tidy --*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.h b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.h index b53e6c45913d5..c2e39d3a7026d 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.h +++ b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidUnderscoreInGoogletestNameCheck.h - clang-tidy ----*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.cpp b/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.cpp index 0b14a51c12a8c..9831efe5384a2 100644 --- a/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.cpp @@ -1,4 +1,4 @@ -//===--- DefaultArgumentsCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h b/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h index 49d95a5acd35c..c0e539598e00e 100644 --- a/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h +++ b/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h @@ -1,4 +1,4 @@ -//===--- DefaultArgumentsCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp index 68233ec6bd441..a038af4fa9543 100644 --- a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp @@ -1,4 +1,4 @@ -//===--- ExplicitConstructorCheck.cpp - clang-tidy ------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.h b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.h index e4434ac71d786..4ed3671fd3951 100644 --- a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.h +++ b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.h @@ -1,4 +1,4 @@ -//===--- ExplicitConstructorCheck.h - clang-tidy ----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.cpp b/clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.cpp index d911b58cb8b7e..ac56f5d920e21 100644 --- a/clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.cpp @@ -1,4 +1,4 @@ -//===--- ExplicitMakePairCheck.cpp - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.h b/clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.h index 2c796a2811314..49d5172f932d2 100644 --- a/clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.h +++ b/clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.h @@ -1,4 +1,4 @@ -//===--- ExplicitMakePairCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.cpp b/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.cpp index 6631030734d25..3d75f4dd25bd1 100644 --- a/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.cpp @@ -1,4 +1,4 @@ -//===--- FunctionNamingCheck.cpp - clang-tidy -----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h b/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h index 560bb52f15a00..1f4fe92d542a8 100644 --- a/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h +++ b/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h @@ -1,4 +1,4 @@ -//===--- FunctionNamingCheck.h - clang-tidy ---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/GlobalNamesInHeadersCheck.cpp b/clang-tools-extra/clang-tidy/google/GlobalNamesInHeadersCheck.cpp index 459dee1247525..aa8bc74e911b4 100644 --- a/clang-tools-extra/clang-tidy/google/GlobalNamesInHeadersCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/GlobalNamesInHeadersCheck.cpp @@ -1,4 +1,4 @@ -//===--- GlobalNamesInHeadersCheck.cpp - clang-tidy --------------*- C++-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/GlobalNamesInHeadersCheck.h b/clang-tools-extra/clang-tidy/google/GlobalNamesInHeadersCheck.h index 70a0a4c0cda00..4cc36630d3851 100644 --- a/clang-tools-extra/clang-tidy/google/GlobalNamesInHeadersCheck.h +++ b/clang-tools-extra/clang-tidy/google/GlobalNamesInHeadersCheck.h @@ -1,4 +1,4 @@ -//===--- GlobalNamesInHeadersCheck.h - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp index 9082c9368d87d..c0c3ffaee796f 100644 --- a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp @@ -1,4 +1,4 @@ -//===--- GlobalVariableDeclarationCheck.cpp - clang-tidy-------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.h b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.h index 19e6c5dbc8e22..c6c32c3ff0884 100644 --- a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.h +++ b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.h @@ -1,4 +1,4 @@ -//===--- GlobalVariableDeclarationCheck.h - clang-tidy-----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp index eb5666be62bcf..aff8b45ff2f74 100644 --- a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- GoogleTidyModule.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp b/clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp index 711444fa9bcdc..047c7f99ae299 100644 --- a/clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp @@ -1,4 +1,4 @@ -//===--- IntegerTypesCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/IntegerTypesCheck.h b/clang-tools-extra/clang-tidy/google/IntegerTypesCheck.h index c62bda67ae2d9..be4989851f20a 100644 --- a/clang-tools-extra/clang-tidy/google/IntegerTypesCheck.h +++ b/clang-tools-extra/clang-tidy/google/IntegerTypesCheck.h @@ -1,4 +1,4 @@ -//===--- IntegerTypesCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.cpp b/clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.cpp index 6ec907ab1b696..63ca86266e27e 100644 --- a/clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.cpp @@ -1,4 +1,4 @@ -//===--- OverloadedUnaryAndCheck.cpp - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.h b/clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.h index 3c3b668754ac1..126f0fbc61b87 100644 --- a/clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.h +++ b/clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.h @@ -1,4 +1,4 @@ -//===--- OverloadedUnaryAndCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/TodoCommentCheck.cpp b/clang-tools-extra/clang-tidy/google/TodoCommentCheck.cpp index adad54aa24ba9..8554870287c81 100644 --- a/clang-tools-extra/clang-tidy/google/TodoCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/TodoCommentCheck.cpp @@ -1,4 +1,4 @@ -//===--- TodoCommentCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/TodoCommentCheck.h b/clang-tools-extra/clang-tidy/google/TodoCommentCheck.h index de540d810afaa..d56036095bab9 100644 --- a/clang-tools-extra/clang-tidy/google/TodoCommentCheck.h +++ b/clang-tools-extra/clang-tidy/google/TodoCommentCheck.h @@ -1,4 +1,4 @@ -//===--- TodoCommentCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/UnnamedNamespaceInHeaderCheck.cpp b/clang-tools-extra/clang-tidy/google/UnnamedNamespaceInHeaderCheck.cpp index c1e421308d77d..3066dd0ff4595 100644 --- a/clang-tools-extra/clang-tidy/google/UnnamedNamespaceInHeaderCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/UnnamedNamespaceInHeaderCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnnamedNamespaceInHeaderCheck.cpp - clang-tidy ---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/UnnamedNamespaceInHeaderCheck.h b/clang-tools-extra/clang-tidy/google/UnnamedNamespaceInHeaderCheck.h index 55b735c0d141b..84f8ae56f2635 100644 --- a/clang-tools-extra/clang-tidy/google/UnnamedNamespaceInHeaderCheck.h +++ b/clang-tools-extra/clang-tidy/google/UnnamedNamespaceInHeaderCheck.h @@ -1,4 +1,4 @@ -//===--- UnnamedNamespaceInHeaderCheck.h - clang-tidy -----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.cpp b/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.cpp index c9b48e922ea57..9da1915affd91 100644 --- a/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.cpp @@ -1,4 +1,4 @@ -//===--- UpgradeGoogletestCaseCheck.cpp - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.h b/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.h index 61b09b9a9f7de..43fff32d86215 100644 --- a/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.h +++ b/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.h @@ -1,4 +1,4 @@ -//===--- UpgradeGoogletestCaseCheck.h - clang-tidy --------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/UsingNamespaceDirectiveCheck.cpp b/clang-tools-extra/clang-tidy/google/UsingNamespaceDirectiveCheck.cpp index 26d27c19f489e..fbfd5d3430519 100644 --- a/clang-tools-extra/clang-tidy/google/UsingNamespaceDirectiveCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/UsingNamespaceDirectiveCheck.cpp @@ -1,4 +1,4 @@ -//===--- UsingNamespaceDirectiveCheck.cpp - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/google/UsingNamespaceDirectiveCheck.h b/clang-tools-extra/clang-tidy/google/UsingNamespaceDirectiveCheck.h index b7abac1311045..bcdf6b6a4bcf1 100644 --- a/clang-tools-extra/clang-tidy/google/UsingNamespaceDirectiveCheck.h +++ b/clang-tools-extra/clang-tidy/google/UsingNamespaceDirectiveCheck.h @@ -1,4 +1,4 @@ -//===--- UsingNamespaceDirectiveCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.cpp b/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.cpp index ed39568ea554a..71b82875c09a0 100644 --- a/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.cpp +++ b/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.cpp @@ -1,4 +1,4 @@ -//===--- ExceptionBaseclassCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.h b/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.h index 79d8cf925d1b7..bc21249663af8 100644 --- a/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.h +++ b/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.h @@ -1,4 +1,4 @@ -//===--- ExceptionBaseclassCheck.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp b/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp index 65a56be3e5a05..9695eab51062b 100644 --- a/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp @@ -1,4 +1,4 @@ -//===------- HICPPTidyModule.cpp - clang-tidy -----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.cpp b/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.cpp index b1a18485ce168..5321fd8d5b1c2 100644 --- a/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.cpp +++ b/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.cpp @@ -1,4 +1,4 @@ -//===--- IgnoredRemoveResultCheck.cpp - clang-tidy ------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.h b/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.h index 39c45fea9aae4..8cf58d5a6978a 100644 --- a/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.h +++ b/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.h @@ -1,4 +1,4 @@ -//===--- IgnoredRemoveResultCheck.h - clang-tidy ----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.cpp b/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.cpp index 3f5cd4b473903..e610d99007d4e 100644 --- a/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.cpp +++ b/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.cpp @@ -1,4 +1,4 @@ -//===--- MultiwayPathsCoveredCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h b/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h index c26fb3e72211d..2507f6cde338e 100644 --- a/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h +++ b/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h @@ -1,4 +1,4 @@ -//===--- MultiwayPathsCoveredCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp index 54ba6aebab1ba..a89a896b32981 100644 --- a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp +++ b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoAssemblerCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h index 7ade7a33091f1..cf397df1578a4 100644 --- a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h +++ b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h @@ -1,4 +1,4 @@ -//===--- NoAssemblerCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.cpp b/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.cpp index bf09a6662d955..19c716e941271 100644 --- a/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.cpp +++ b/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.cpp @@ -1,4 +1,4 @@ -//===--- SignedBitwiseCheck.cpp - clang-tidy-------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h b/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h index 170c249bc65e6..b3538e7e51f58 100644 --- a/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h +++ b/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h @@ -1,4 +1,4 @@ -//===--- SignedBitwiseCheck.h - clang-tidy-----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/linuxkernel/LinuxKernelTidyModule.cpp b/clang-tools-extra/clang-tidy/linuxkernel/LinuxKernelTidyModule.cpp index b8b75b7ccaefe..645d07426fee2 100644 --- a/clang-tools-extra/clang-tidy/linuxkernel/LinuxKernelTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/linuxkernel/LinuxKernelTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- LinuxKernelTidyModule.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/linuxkernel/MustCheckErrsCheck.cpp b/clang-tools-extra/clang-tidy/linuxkernel/MustCheckErrsCheck.cpp index ce501ac2acca2..14f54571885f2 100644 --- a/clang-tools-extra/clang-tidy/linuxkernel/MustCheckErrsCheck.cpp +++ b/clang-tools-extra/clang-tidy/linuxkernel/MustCheckErrsCheck.cpp @@ -1,4 +1,4 @@ -//===--- MustCheckErrsCheck.cpp - clang-tidy ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/linuxkernel/MustCheckErrsCheck.h b/clang-tools-extra/clang-tidy/linuxkernel/MustCheckErrsCheck.h index 7406aaead836e..a450f50c30cb8 100644 --- a/clang-tools-extra/clang-tidy/linuxkernel/MustCheckErrsCheck.h +++ b/clang-tools-extra/clang-tidy/linuxkernel/MustCheckErrsCheck.h @@ -1,4 +1,4 @@ -//===--- MustCheckErrsCheck.h - clang-tidy ----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.cpp b/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.cpp index 3ea235b1fed7f..8737c1e5f4b05 100644 --- a/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.cpp @@ -1,4 +1,4 @@ -//===--- HeaderGuardCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.h b/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.h index 508172c6b3a84..1eb307d8347df 100644 --- a/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.h +++ b/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.h @@ -1,4 +1,4 @@ -//===--- HeaderGuardCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.cpp b/clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.cpp index 4246c8c574c50..f34e3a67c03ab 100644 --- a/clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.cpp @@ -1,4 +1,4 @@ -//===--- IncludeOrderCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.h b/clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.h index d992d151734ce..27c6798481866 100644 --- a/clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.h +++ b/clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.h @@ -1,4 +1,4 @@ -//===--- IncludeOrderCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/LLVMTidyModule.cpp b/clang-tools-extra/clang-tidy/llvm/LLVMTidyModule.cpp index c1f78caf44d16..ed65cd1720457 100644 --- a/clang-tools-extra/clang-tidy/llvm/LLVMTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/llvm/LLVMTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- LLVMTidyModule.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/PreferIsaOrDynCastInConditionalsCheck.cpp b/clang-tools-extra/clang-tidy/llvm/PreferIsaOrDynCastInConditionalsCheck.cpp index cb289af46ea44..f4f3543b56e5c 100644 --- a/clang-tools-extra/clang-tidy/llvm/PreferIsaOrDynCastInConditionalsCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvm/PreferIsaOrDynCastInConditionalsCheck.cpp @@ -1,5 +1,4 @@ -//===--- PreferIsaOrDynCastInConditionalsCheck.cpp - clang-tidy -//---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/PreferIsaOrDynCastInConditionalsCheck.h b/clang-tools-extra/clang-tidy/llvm/PreferIsaOrDynCastInConditionalsCheck.h index 5b611096c25fd..cf4b64ad21686 100644 --- a/clang-tools-extra/clang-tidy/llvm/PreferIsaOrDynCastInConditionalsCheck.h +++ b/clang-tools-extra/clang-tidy/llvm/PreferIsaOrDynCastInConditionalsCheck.h @@ -1,4 +1,4 @@ -//===--- PreferIsaOrDynCastInConditionalsCheck.h - clang-tidy ---*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.cpp b/clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.cpp index f88122360aeb1..c5ee240b64ea8 100644 --- a/clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.cpp @@ -1,4 +1,4 @@ -//===--- PreferRegisterOverUnsignedCheck.cpp - clang-tidy -----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.h b/clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.h index 07e018a6fc969..9a7a0c3f35857 100644 --- a/clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.h +++ b/clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.h @@ -1,4 +1,4 @@ -//===--- PreferRegisterOverUnsignedCheck.h - clang-tidy ---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/PreferStaticOverAnonymousNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/llvm/PreferStaticOverAnonymousNamespaceCheck.cpp index ea79bfaef8876..ea81c7c10b7d9 100644 --- a/clang-tools-extra/clang-tidy/llvm/PreferStaticOverAnonymousNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvm/PreferStaticOverAnonymousNamespaceCheck.cpp @@ -1,4 +1,4 @@ -//===--- PreferStaticOverAnonymousNamespaceCheck.cpp - clang-tidy ---------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/PreferStaticOverAnonymousNamespaceCheck.h b/clang-tools-extra/clang-tidy/llvm/PreferStaticOverAnonymousNamespaceCheck.h index ca0245e1d3031..a8738e6fbd70d 100644 --- a/clang-tools-extra/clang-tidy/llvm/PreferStaticOverAnonymousNamespaceCheck.h +++ b/clang-tools-extra/clang-tidy/llvm/PreferStaticOverAnonymousNamespaceCheck.h @@ -1,4 +1,4 @@ -//===--- PreferStaticOverAnonymousNamespaceCheck.h - clang-tidy -*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.cpp b/clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.cpp index 42e53ef6025d6..b8b7c41e970bb 100644 --- a/clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.cpp @@ -1,4 +1,4 @@ -//===--- TwineLocalCheck.cpp - clang-tidy ---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.h b/clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.h index b4550ecb226bf..7bde04c5f11ea 100644 --- a/clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.h +++ b/clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.h @@ -1,4 +1,4 @@ -//===--- TwineLocalCheck.h - clang-tidy -------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/UseNewMLIROpBuilderCheck.cpp b/clang-tools-extra/clang-tidy/llvm/UseNewMLIROpBuilderCheck.cpp index 4722199364cb5..0d81b9a9e38ca 100644 --- a/clang-tools-extra/clang-tidy/llvm/UseNewMLIROpBuilderCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvm/UseNewMLIROpBuilderCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseNewMLIROpBuilderCheck.cpp - clang-tidy ------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/UseNewMLIROpBuilderCheck.h b/clang-tools-extra/clang-tidy/llvm/UseNewMLIROpBuilderCheck.h index 813a23c564782..0842699823a65 100644 --- a/clang-tools-extra/clang-tidy/llvm/UseNewMLIROpBuilderCheck.h +++ b/clang-tools-extra/clang-tidy/llvm/UseNewMLIROpBuilderCheck.h @@ -1,4 +1,4 @@ -//===--- UseNewMLIROpBuilderCheck.h - clang-tidy ----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/llvm/UseRangesCheck.cpp index 4afab488b7dcc..49dc92456af39 100644 --- a/clang-tools-extra/clang-tidy/llvm/UseRangesCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvm/UseRangesCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvm/UseRangesCheck.h b/clang-tools-extra/clang-tidy/llvm/UseRangesCheck.h index e9904e11ced36..b985288ea0e4c 100644 --- a/clang-tools-extra/clang-tidy/llvm/UseRangesCheck.h +++ b/clang-tools-extra/clang-tidy/llvm/UseRangesCheck.h @@ -1,4 +1,4 @@ -//===--- UseRangesCheck.h - clang-tidy --------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.cpp index 4bc4d5a4691f0..dd1ef076c65e1 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.cpp @@ -1,4 +1,4 @@ -//===-- CalleeNamespaceCheck.cpp ------------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.h b/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.h index e718c990c4baa..34c628ea5f6e4 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.h +++ b/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.h @@ -1,4 +1,4 @@ -//===-- CalleeNamespaceCheck.h ----------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp index c2fbc4422e5d2..567ade5d9a08b 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp @@ -1,4 +1,4 @@ -//===--- ImplementationInNamespaceCheck.cpp - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.h b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.h index 42da38f728bb8..da97443191b9f 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.h +++ b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.h @@ -1,4 +1,4 @@ -//===--- ImplementationInNamespaceCheck.h - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp b/clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp index 4d92b1f6b8d1c..9dae57a50bb52 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp @@ -1,4 +1,4 @@ -//===-- InlineFunctionDeclCheck.cpp ---------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.h b/clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.h index 52516f776ad49..01a8df46ec666 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.h +++ b/clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.h @@ -1,4 +1,4 @@ -//===-- InlineFunctionDeclCheck.h -------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/LLVMLibcTidyModule.cpp b/clang-tools-extra/clang-tidy/llvmlibc/LLVMLibcTidyModule.cpp index 562d71a0891c4..ded85939b75c8 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/LLVMLibcTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/LLVMLibcTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- LLVMLibcTidyModule.cpp - clang-tidy ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/NamespaceConstants.h b/clang-tools-extra/clang-tidy/llvmlibc/NamespaceConstants.h index 83908a7875d03..50669dc073291 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/NamespaceConstants.h +++ b/clang-tools-extra/clang-tidy/llvmlibc/NamespaceConstants.h @@ -1,4 +1,4 @@ -//===--- NamespaceConstants.h -----------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.cpp b/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.cpp index 7db648abcc882..129b8a9a30a59 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.cpp @@ -1,4 +1,4 @@ -//===--- RestrictSystemLibcHeadersCheck.cpp - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.h b/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.h index f7e613cec5f0e..1c7b31037875d 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.h +++ b/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.h @@ -1,4 +1,4 @@ -//===--- RestrictSystemLibcHeadersCheck.h - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp index 79ae5ee98182b..2d0323ac04515 100644 --- a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp @@ -1,4 +1,4 @@ -//===--- ConfusableIdentifierCheck.cpp - clang-tidy -----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h index 9cce6cce67682..37337954822b7 100644 --- a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h +++ b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h @@ -1,4 +1,4 @@ -//===--- ConfusableIdentifierCheck.h - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/ConfusableTable/BuildConfusableTable.cpp b/clang-tools-extra/clang-tidy/misc/ConfusableTable/BuildConfusableTable.cpp index 18ac7e508165b..6a079024cfe1c 100644 --- a/clang-tools-extra/clang-tidy/misc/ConfusableTable/BuildConfusableTable.cpp +++ b/clang-tools-extra/clang-tidy/misc/ConfusableTable/BuildConfusableTable.cpp @@ -1,4 +1,4 @@ -//===--- BuildConfusableTable.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp index b32507d66cbac..b93f3d6a5a13b 100644 --- a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp @@ -1,4 +1,4 @@ -//===--- ConstCorrectnessCheck.cpp - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h index 8af59b7fee294..650f35b50e189 100644 --- a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h +++ b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h @@ -1,4 +1,4 @@ -//===--- ConstCorrectnessCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp index 360335b86c641..8ec7695aa842f 100644 --- a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp @@ -1,4 +1,4 @@ -//===--- CoroutineHostileRAII.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h index be925097692a4..95c2b04b82ea7 100644 --- a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h @@ -1,4 +1,4 @@ -//===--- CoroutineHostileRAIICheck.h - clang-tidy ----------------*- C++-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp b/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp index ee86925689898..714af111e7f7a 100644 --- a/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp @@ -1,4 +1,4 @@ -//===--- DefinitionsInHeadersCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h b/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h index ebb55d5c0e55b..ce1293038078c 100644 --- a/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h +++ b/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h @@ -1,4 +1,4 @@ -//===--- DefinitionsInHeadersCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.cpp b/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.cpp index 1f6ceda9f5b9e..a0e7ac19ab2d5 100644 --- a/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.cpp @@ -1,4 +1,4 @@ -//===--- HeaderIncludeCycleCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.h b/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.h index 9a1a72399f423..dcf538c4c2844 100644 --- a/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.h +++ b/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.h @@ -1,4 +1,4 @@ -//===--- HeaderIncludeCycleCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp index 813005b892ed7..1a5aa4b0758a6 100644 --- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp @@ -1,4 +1,4 @@ -//===--- IncludeCleanerCheck.cpp - clang-tidy -----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h index 8f05887efb776..941a2aad79856 100644 --- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h +++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h @@ -1,4 +1,4 @@ -//===--- IncludeCleanerCheck.h - clang-tidy ---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp index f675ca70deb9d..6f4af6c44dcb4 100644 --- a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- MiscTidyModule.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp index d7040e28984b0..f89c539423507 100644 --- a/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp +++ b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp @@ -1,4 +1,4 @@ -//===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h index 9ffb238aeee35..aa7e0432b9ceb 100644 --- a/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h +++ b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h @@ -1,4 +1,4 @@ -//===--- MisleadingBidirectionalCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.cpp b/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.cpp index 1f5dc94755559..ce04fb6fa4096 100644 --- a/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.cpp +++ b/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.cpp @@ -1,4 +1,4 @@ -//===--- MisleadingIdentifier.cpp - clang-tidy-----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h b/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h index 7278b741246a3..5e1a56ddc479a 100644 --- a/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h +++ b/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h @@ -1,4 +1,4 @@ -//===--- MisleadingIdentifierCheck.h - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.cpp b/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.cpp index bb64a5618620c..afa59f31d7259 100644 --- a/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.cpp @@ -1,4 +1,4 @@ -//===--- MisplacedConstCheck.cpp - clang-tidy------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h b/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h index b2d88d41b5e31..1abacb4c16426 100644 --- a/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h +++ b/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h @@ -1,4 +1,4 @@ -//===--- MisplacedConstCheck.h - clang-tidy----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.cpp b/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.cpp index 2837f40bc49b8..5e0f32a900ea8 100644 --- a/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.cpp @@ -1,4 +1,4 @@ -//===--- NewDeleteOverloadsCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h b/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h index b11a57aebb107..93c39fc7005cf 100644 --- a/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h +++ b/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h @@ -1,4 +1,4 @@ -//===--- NewDeleteOverloadsCheck.h - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp b/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp index 712f390765957..0d7667ce53c0c 100644 --- a/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoRecursionCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.h b/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.h index fd82ffc6e5aba..b678137927351 100644 --- a/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.h +++ b/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.h @@ -1,4 +1,4 @@ -//===--- NoRecursionCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/NonCopyableObjects.cpp b/clang-tools-extra/clang-tidy/misc/NonCopyableObjects.cpp index 6fb4a66d0bac6..b33e2667ef660 100644 --- a/clang-tools-extra/clang-tidy/misc/NonCopyableObjects.cpp +++ b/clang-tools-extra/clang-tidy/misc/NonCopyableObjects.cpp @@ -1,4 +1,4 @@ -//===--- NonCopyableObjects.cpp - clang-tidy-------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/NonCopyableObjects.h b/clang-tools-extra/clang-tidy/misc/NonCopyableObjects.h index b886ea948476d..2fcbf41dcf5e1 100644 --- a/clang-tools-extra/clang-tidy/misc/NonCopyableObjects.h +++ b/clang-tools-extra/clang-tidy/misc/NonCopyableObjects.h @@ -1,4 +1,4 @@ -//===--- NonCopyableObjects.h - clang-tidy-----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp b/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp index 9d7d9d1f865ba..fffce2095d8d5 100644 --- a/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp @@ -1,4 +1,4 @@ -//===--- NonPrivateMemberVariablesInClassesCheck.cpp - clang-tidy ---------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.h b/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.h index 5f0687abdd118..09077226eb5c5 100644 --- a/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.h +++ b/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.h @@ -1,4 +1,4 @@ -//===--- NonPrivateMemberVariablesInClassesCheck.h - clang-tidy -*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/OverrideWithDifferentVisibilityCheck.cpp b/clang-tools-extra/clang-tidy/misc/OverrideWithDifferentVisibilityCheck.cpp index 2fe0bcf67a3d7..09c52699a27ba 100644 --- a/clang-tools-extra/clang-tidy/misc/OverrideWithDifferentVisibilityCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/OverrideWithDifferentVisibilityCheck.cpp @@ -1,4 +1,4 @@ -//===--- OverrideWithDifferentVisibilityCheck.cpp - clang-tidy ------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/OverrideWithDifferentVisibilityCheck.h b/clang-tools-extra/clang-tidy/misc/OverrideWithDifferentVisibilityCheck.h index 1f5222d99196b..6e0909524991d 100644 --- a/clang-tools-extra/clang-tidy/misc/OverrideWithDifferentVisibilityCheck.h +++ b/clang-tools-extra/clang-tidy/misc/OverrideWithDifferentVisibilityCheck.h @@ -1,4 +1,4 @@ -//===--- OverrideWithDifferentVisibilityCheck.h - clang-tidy --*- C++ -*---===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp index 107eda2e98f27..17a8a50ff04ac 100644 --- a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantExpressionCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h index 7b3b84b5b32a3..784548355c164 100644 --- a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h +++ b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantExpressionCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.cpp b/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.cpp index 37fbd8c0d725f..5ac53005ad0fa 100644 --- a/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.cpp @@ -1,4 +1,4 @@ -//===--- StaticAssertCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.h b/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.h index 7b378e016408a..8df3b71c5e6c1 100644 --- a/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.h +++ b/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.h @@ -1,4 +1,4 @@ -//===--- StaticAssertCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp index fd2e6b7f39a6b..92ff1c8f72fa9 100644 --- a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp @@ -1,4 +1,4 @@ -//===--- ThrowByValueCatchByReferenceCheck.cpp - clang-tidy----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h index e3cc4c5e6cd41..15c17e7fa8f65 100644 --- a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h +++ b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h @@ -1,4 +1,4 @@ -//===--- ThrowByValueCatchByReferenceCheck.h - clang-tidy--------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.cpp index 8200239b982a0..8a85e79f5aa21 100644 --- a/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnconventionalAssignOperatorCheck.cpp - clang-tidy -----*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h b/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h index c1aefaa8790e8..960c85eb89cbc 100644 --- a/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h +++ b/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h @@ -1,4 +1,4 @@ -//===--- UnconventionalAssignOperatorCheck.h - clang-tidy -------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.cpp b/clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.cpp index 0e24b47f5055f..27ddb7cb9b71c 100644 --- a/clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.cpp @@ -1,4 +1,4 @@ -//===--- UniqueptrResetReleaseCheck.cpp - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.h b/clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.h index a76cc77112c6e..cb83ac7b11985 100644 --- a/clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.h +++ b/clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.h @@ -1,4 +1,4 @@ -//===--- UniqueptrResetReleaseCheck.h - clang-tidy --------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.cpp index 4fa679aa8dd88..8e54a21b49740 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnusedAliasDeclsCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h b/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h index 9f995d94c1989..ffe82ca989d17 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h +++ b/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h @@ -1,4 +1,4 @@ -//===--- UnusedAliasDeclsCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp index 503f62f946e81..37e289cd9e497 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnusedParametersCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h index 90097ed415d37..6e09086d667f9 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h +++ b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h @@ -1,4 +1,4 @@ -//===--- UnusedParametersCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp index 49432073ce1d7..31524e41f12a3 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnusedUsingDeclsCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h index e5f766dbac56b..ce77acf443e2c 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h +++ b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h @@ -1,4 +1,4 @@ -//===--- UnusedUsingDeclsCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UseAnonymousNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/misc/UseAnonymousNamespaceCheck.cpp index 05b470141f1f7..aa0cc1ecd5761 100644 --- a/clang-tools-extra/clang-tidy/misc/UseAnonymousNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UseAnonymousNamespaceCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseAnonymousNamespaceCheck.cpp - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UseAnonymousNamespaceCheck.h b/clang-tools-extra/clang-tidy/misc/UseAnonymousNamespaceCheck.h index 3f73ce7c398af..10e10b0c32360 100644 --- a/clang-tools-extra/clang-tidy/misc/UseAnonymousNamespaceCheck.h +++ b/clang-tools-extra/clang-tidy/misc/UseAnonymousNamespaceCheck.h @@ -1,4 +1,4 @@ -//===--- UseAnonymousNamespaceCheck.h - clang-tidy --------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp index e2071b806b125..415852d6f14e9 100644 --- a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseInternalLinkageCheck.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.h b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.h index 0d6c3e43aa945..8c82ac0b6b644 100644 --- a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.h +++ b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.h @@ -1,4 +1,4 @@ -//===--- UseInternalLinkageCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp index aa7836bcbf169..1c0043b423361 100644 --- a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidBindCheck.cpp - clang-tidy-----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h index a9054864c3c11..ba9e562324e55 100644 --- a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidBindCheck.h - clang-tidy---------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp index a5b535f7433bb..92900192957e5 100644 --- a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidCArraysCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.h b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.h index 719e88e4b3166..ff0809644050b 100644 --- a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidCArraysCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.cpp index 5d11843fea65c..6e28cb223370a 100644 --- a/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.cpp @@ -1,4 +1,4 @@ -//===--- ConcatNestedNamespacesCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.h b/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.h index a5724e169e48d..9886cb5a2d7d9 100644 --- a/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.h @@ -1,4 +1,4 @@ -//===--- ConcatNestedNamespacesCheck.h - clang-tidy--------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp index 47a3ef987ebcf..9f4c215614287 100644 --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp @@ -1,4 +1,4 @@ -//===--- DeprecatedHeadersCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h index 68305efdb5d23..c9409cb641c54 100644 --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h @@ -1,4 +1,4 @@ -//===--- DeprecatedHeadersCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.cpp b/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.cpp index 2aca61021166d..5e254376c9796 100644 --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.cpp @@ -1,4 +1,4 @@ -//===--- DeprecatedIosBaseAliasesCheck.cpp - clang-tidy--------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.h b/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.h index 09cfebef48d48..0a0b4deb5abba 100644 --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.h @@ -1,4 +1,4 @@ -//===--- DeprecatedIosBaseAliasesCheck.h - clang-tidy------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp b/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp index ecf3a18199ffe..05cf51a430f3f 100644 --- a/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp +++ b/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp @@ -1,4 +1,4 @@ -//===--- IntegralLiteralExpressionMatcher.cpp - clang-tidy ----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h b/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h index 22893784b07f8..d495087f49491 100644 --- a/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h +++ b/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h @@ -1,4 +1,4 @@ -//===--- IntegralLiteralExpressionMatcher.h - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp index 3ce7b12f92f6b..37482583760f2 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp @@ -1,4 +1,4 @@ -//===--- LoopConvertCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h index b4f729d3ac538..55487828ca69e 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h @@ -1,4 +1,4 @@ -//===--- LoopConvertCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.cpp index 3d0a1f01725fa..286c39be44ce4 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.cpp +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.cpp @@ -1,4 +1,4 @@ -//===--- LoopConvertUtils.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h b/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h index ca9c1855038b5..306eca7140d1a 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h @@ -1,4 +1,4 @@ -//===--- LoopConvertUtils.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp index 118e96a6f34ae..2669aa2361ea1 100644 --- a/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp @@ -1,4 +1,4 @@ -//===--- MacroToEnumCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.h b/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.h index b56d7ac3b22ef..3f339f364d722 100644 --- a/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.h @@ -1,4 +1,4 @@ -//===--- MacroToEnumCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.cpp index 69f7d9f69eeed..207195551883b 100644 --- a/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.cpp @@ -1,4 +1,4 @@ -//===--- MakeSharedCheck.cpp - clang-tidy----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h b/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h index caaf4ae403c34..025ce757b3d5f 100644 --- a/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h @@ -1,4 +1,4 @@ -//===--- MakeSharedCheck.h - clang-tidy--------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp index b3e29b3273a82..9d01e27fbab9c 100644 --- a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp @@ -1,4 +1,4 @@ -//===--- MakeSmartPtrCheck.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h index e2f9abed8138a..28d5b459dd914 100644 --- a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h @@ -1,4 +1,4 @@ -//===--- MakeSmartPtrCheck.h - clang-tidy------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.cpp index d4d9f700f12c9..b13d95633c12e 100644 --- a/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.cpp @@ -1,4 +1,4 @@ -//===--- MakeUniqueCheck.cpp - clang-tidy----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h b/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h index 7b356823a8cfa..9c4f6bc746392 100644 --- a/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h @@ -1,4 +1,4 @@ -//===--- MakeUniqueCheck.h - clang-tidy--------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp index 9861f4681db1b..b5a985b0ac5d4 100644 --- a/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp @@ -1,4 +1,4 @@ -//===--- MinMaxUseInitializerListCheck.cpp - clang-tidy -------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h index 577d126530761..45fc5089f7737 100644 --- a/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h @@ -1,4 +1,4 @@ -//===--- MinMaxUseInitializerListCheck.h - clang-tidy -----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp index fdf38bc4b6308..9b98ffdadba68 100644 --- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- ModernizeTidyModule.cpp - clang-tidy -----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp index a54d0721a5b7d..d5ccbb73735ec 100644 --- a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp @@ -1,4 +1,4 @@ -//===--- PassByValueCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h index b586b8d5fbf66..f27871c1a98b7 100644 --- a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h @@ -1,4 +1,4 @@ -//===--- PassByValueCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp index 0c9e909fea7f9..8e514e4bc9893 100644 --- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp @@ -1,4 +1,4 @@ -//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h index 879255550dd5b..5af9f846db29b 100644 --- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h @@ -1,4 +1,4 @@ -//===--- RawStringLiteralCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h b/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h index bda5f2c253ce9..53de74b68ff26 100644 --- a/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantVoidArgCheck.h - clang-tidy --------------------*- C++-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp index f2142b810a126..b562ae85aa266 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp @@ -1,4 +1,4 @@ -//===--- ReplaceAutoPtrCheck.cpp - clang-tidy------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h index c91f5f580c524..9a6e2bb0e074d 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h @@ -1,4 +1,4 @@ -//===--- ReplaceAutoPtrCheck.h - clang-tidy----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp index 42be7d7a7b78c..64b0029fc0e37 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp @@ -1,4 +1,4 @@ -//===--- ReplaceDisallowCopyAndAssignMacroCheck.cpp - clang-tidy ----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.h b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.h index 71e5ecafd6a6f..44ca787fa4fcc 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.h @@ -1,4 +1,4 @@ -//===--- ReplaceDisallowCopyAndAssignMacroCheck.h - clang-tidy --*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp index df20800a215da..3d7b3eae544b6 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp @@ -1,4 +1,4 @@ -//===--- ReplaceRandomShuffleCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h index 95927c2803e7b..23571dfa92175 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h @@ -1,4 +1,4 @@ -//===--- ReplaceRandomShuffleCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.cpp index 472cc34be4378..eba2445c0aaea 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.cpp @@ -1,4 +1,4 @@ -//===--- ReturnBracedInitListCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h b/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h index e9ea58b06d826..c023cb5c4c2ca 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h @@ -1,4 +1,4 @@ -//===--- ReturnBracedInitListCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.cpp index b971b82507644..e32ddbf87efe9 100644 --- a/clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.cpp @@ -1,4 +1,4 @@ -//===--- ShrinkToFitCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.h b/clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.h index 30c2ca25b27e5..d7070d63ca983 100644 --- a/clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.h @@ -1,4 +1,4 @@ -//===--- ShrinkToFitCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.cpp b/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.cpp index 15bd0a6760ec1..6078013166d46 100644 --- a/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.cpp @@ -1,4 +1,4 @@ -//===--- TypeTraitsCheck.cpp - clang-tidy ---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.h b/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.h index a08b96fd9f13e..1f9ffc9b8b811 100644 --- a/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.h @@ -1,4 +1,4 @@ -//===--- TypeTraitsCheck.h - clang-tidy -------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.cpp index d4ca652838741..4e4817f2ec2e6 100644 --- a/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnaryStaticAssertCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h b/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h index 6ec1a68a28fc1..94e78f01b06f9 100644 --- a/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h @@ -1,4 +1,4 @@ -//===--- UnaryStaticAssertCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp index aedfda83838cd..c7fd0a9695952 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseAutoCheck.cpp - clang-tidy-------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h index 7a9bbbe1cdf77..dc39077d5ac99 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h @@ -1,4 +1,4 @@ -//===--- UseAutoCheck.h - clang-tidy-----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.cpp index dfcfc925b5231..8b5ffe86b1839 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseBoolLiteralsCheck.cpp - clang-tidy-----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h b/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h index 67e6921554852..5b7b1e0cc3b6e 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h @@ -1,4 +1,4 @@ -//===--- UseBoolLiteralsCheck.h - clang-tidy---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.cpp index c4a64be537a44..d5342a1664153 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseConstraintsCheck.cpp - clang-tidy -----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.h b/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.h index 814160190e0f4..bf49f329baeab 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.h @@ -1,4 +1,4 @@ -//===--- UseConstraintsCheck.h - clang-tidy ---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.cpp index e950fd1c77da5..d920af7fc477b 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseDefaultMemberInitCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h b/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h index 099449a3167fa..7ae04b78006a1 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h @@ -1,4 +1,4 @@ -//===--- UseDefaultMemberInitCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp index e9e750ad4e933..cc7c2d1e1dff5 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseDesignatedInitializersCheck.cpp - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.h b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.h index 79095ade50371..e010509474287 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.h @@ -1,4 +1,4 @@ -//===--- UseDesignatedInitializersCheck.h - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.cpp index ee49d8a7cb0b0..ade0085267db3 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseEmplaceCheck.cpp - clang-tidy----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h b/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h index f51e51dc734a0..2e9e142894a47 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h @@ -1,4 +1,4 @@ -//===--- UseEmplaceCheck.h - clang-tidy--------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.cpp index f0c541eaca0a0..998703bfc552d 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseEqualsDefaultCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.h b/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.h index 04c2177704fbe..51b386c2acaca 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.h @@ -1,4 +1,4 @@ -//===--- UseEqualsDefaultCheck.h - clang-tidy---------------------*- C++-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.cpp index cf4e4f09c6a90..ab2d41a52040e 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseEqualsDeleteCheck.cpp - clang-tidy-----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.h b/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.h index dc3e712482c21..590aa900b8768 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.h @@ -1,4 +1,4 @@ -//===--- UseEqualsDeleteCheck.h - clang-tidy----------------------*- C++-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp index 4726674be66fd..0003429c62890 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseIntegerSignComparisonCheck.cpp - clang-tidy -------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h index 84bcba84c74b5..106796f0c8072 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h @@ -1,4 +1,4 @@ -//===--- UseIntegerSignComparisonCheck.h - clang-tidy -----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseNodiscardCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNodiscardCheck.cpp index 6de80dcb99c60..d22c99335d9bb 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNodiscardCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseNodiscardCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseNodiscardCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseNodiscardCheck.h b/clang-tools-extra/clang-tidy/modernize/UseNodiscardCheck.h index cbfe1089c03ca..cc46769900dd3 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNodiscardCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseNodiscardCheck.h @@ -1,4 +1,4 @@ -//===--- UseNodiscardCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.cpp index 9ba9e6dd8d2c2..d1388dc6298e4 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseNoexceptCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h b/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h index 159aa97199534..3a915e1fe7238 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h @@ -1,4 +1,4 @@ -//===--- UseNoexceptCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp index c38fb3a01d287..4dc4baecddd50 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseNullptrCheck.cpp - clang-tidy----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h index 4c02f8ccdf303..7c7b5ae02f1cd 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h @@ -1,4 +1,4 @@ -//===--- UseNullptrCheck.h - clang-tidy--------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp index fd5bd9f0b181b..6a19183737119 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseOverrideCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.h b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.h index 2c624f48fcc85..90d941362a903 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.h @@ -1,4 +1,4 @@ -//===--- UseOverrideCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.cpp index 604204e762c78..2e2f25fbb3f58 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h index 51327dab53e3d..80ea6996afe55 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h @@ -1,4 +1,4 @@ -//===--- UseRangesCheck.h - clang-tidy --------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp index 4041c81526d2f..aa1ee6db8917a 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseScopedLockCheck.cpp - clang-tidy ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.h b/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.h index a5697805c15ca..553031857e086 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.h @@ -1,4 +1,4 @@ -//===--- UseScopedLockCheck.h - clang-tidy ----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp index 2af67f7ccb4c1..eebd609cc84a8 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseStartsEndsWithCheck.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h index 17c2999bda84c..70df8b87cb6f4 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h @@ -1,4 +1,4 @@ -//===--- UseStartsEndsWithCheck.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.cpp index 081ec305f3b2a..c95834faab7fc 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseStdFormatCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.h b/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.h index 9ac2240212ebf..e369c17a0f733 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.h @@ -1,4 +1,4 @@ -//===--- UseStdFormatCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.cpp index 934cc24817d73..a04f78c271d42 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseStdNumbersCheck.cpp - clang_tidy ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.h b/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.h index 05fc5ada14b87..f1bd3b4eee2ba 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.h @@ -1,4 +1,4 @@ -//===--- UseStdNumbersCheck.h - clang-tidy ----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.cpp index b1e3ee6e2ba85..99ade046305c1 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseStdPrintCheck.cpp - clang-tidy-----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h b/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h index 995c740389e73..1f7660991a275 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h @@ -1,4 +1,4 @@ -//===--- UseStdPrintCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp index 82f64096cbec1..3e27d8fa1fe42 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseTrailingReturnTypeCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h index 91369919c5d36..9050bd5eba5e2 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h @@ -1,4 +1,4 @@ -//===--- UseTrailingReturnTypeCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.cpp index 2373a26fe48b4..03ecec9bd175b 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseTransparentFunctorsCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.h b/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.h index 80f022159c67b..dc9c76e8875a0 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.h @@ -1,4 +1,4 @@ -//===--- UseTransparentFunctorsCheck.h - clang-tidy--------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.cpp index 1e0a0a551339a..eef9d39800360 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseUncaughtExceptionsCheck.cpp - clang-tidy--------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.h b/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.h index 48677521181f9..4c63efe0c6919 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.h @@ -1,4 +1,4 @@ -//===--- UseUncaughtExceptionsCheck.h - clang-tidy------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp index 4037e8c1ea2fd..72673753e6c60 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseUsingCheck.cpp - clang-tidy------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h index 1e54bbf23c984..4ab1c4f6b9646 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h @@ -1,4 +1,4 @@ -//===--- UseUsingCheck.h - clang-tidy----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.cpp b/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.cpp index a144296c47b8c..00082c7034306 100644 --- a/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.cpp +++ b/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.cpp @@ -1,4 +1,4 @@ -//===--- BufferDerefCheck.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h b/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h index 69e7aa092ebbf..7922750c135ac 100644 --- a/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h +++ b/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h @@ -1,4 +1,4 @@ -//===--- BufferDerefCheck.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/mpi/MPITidyModule.cpp b/clang-tools-extra/clang-tidy/mpi/MPITidyModule.cpp index 67ae101c18cb1..f56cb29455007 100644 --- a/clang-tools-extra/clang-tidy/mpi/MPITidyModule.cpp +++ b/clang-tools-extra/clang-tidy/mpi/MPITidyModule.cpp @@ -1,4 +1,4 @@ -//===--- MPITidyModule.cpp - clang-tidy -----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.cpp b/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.cpp index 5abe4f77d6598..17c1283b4d414 100644 --- a/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.cpp +++ b/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.cpp @@ -1,4 +1,4 @@ -//===--- TypeMismatchCheck.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h b/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h index 480684b0fac53..60bcb0f3cf70c 100644 --- a/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h +++ b/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h @@ -1,4 +1,4 @@ -//===--- TypeMismatchCheck.h - clang-tidy------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/AssertEquals.cpp b/clang-tools-extra/clang-tidy/objc/AssertEquals.cpp index 3d9b9fa401910..3f1bc17926ba2 100644 --- a/clang-tools-extra/clang-tidy/objc/AssertEquals.cpp +++ b/clang-tools-extra/clang-tidy/objc/AssertEquals.cpp @@ -1,4 +1,4 @@ -//===--- AssertEquals.cpp - clang-tidy --------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/AssertEquals.h b/clang-tools-extra/clang-tidy/objc/AssertEquals.h index 59b55273aa118..0f4e303feea8b 100644 --- a/clang-tools-extra/clang-tidy/objc/AssertEquals.h +++ b/clang-tools-extra/clang-tidy/objc/AssertEquals.h @@ -1,4 +1,4 @@ -//===--- AssertEquals.h - clang-tidy ----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/AvoidNSErrorInitCheck.cpp b/clang-tools-extra/clang-tidy/objc/AvoidNSErrorInitCheck.cpp index 0de9584ad4806..650b67e77eeed 100644 --- a/clang-tools-extra/clang-tidy/objc/AvoidNSErrorInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/AvoidNSErrorInitCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidNSErrorInitCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/AvoidNSErrorInitCheck.h b/clang-tools-extra/clang-tidy/objc/AvoidNSErrorInitCheck.h index b343cb0c4fa80..2fd3d11559a39 100644 --- a/clang-tools-extra/clang-tidy/objc/AvoidNSErrorInitCheck.h +++ b/clang-tools-extra/clang-tidy/objc/AvoidNSErrorInitCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidNSErrorInitCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.cpp b/clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.cpp index d18815358b837..3a3307e0ff18f 100644 --- a/clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.cpp @@ -1,4 +1,4 @@ -//===--- DeallocInCategoryCheck.cpp - clang-tidy -------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.h b/clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.h index aa34d5bff9665..f44a123055eee 100644 --- a/clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.h +++ b/clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.h @@ -1,4 +1,4 @@ -//===--- DeallocInCategoryCheck.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.cpp b/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.cpp index 089538d4c65a8..16c9e9b8b4a99 100644 --- a/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.cpp @@ -1,4 +1,4 @@ -//===--- ForbiddenSubclassingCheck.cpp - clang-tidy -----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h b/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h index 3410868ed42a8..1f345c1da5156 100644 --- a/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h +++ b/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h @@ -1,4 +1,4 @@ -//===--- ForbiddenSubclassingCheck.h - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/MissingHashCheck.cpp b/clang-tools-extra/clang-tidy/objc/MissingHashCheck.cpp index 42f383edc67ed..7b48fd9f77bca 100644 --- a/clang-tools-extra/clang-tidy/objc/MissingHashCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/MissingHashCheck.cpp @@ -1,4 +1,4 @@ -//===--- MissingHashCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/MissingHashCheck.h b/clang-tools-extra/clang-tidy/objc/MissingHashCheck.h index fbb08dc249e60..cf0261e3cc38a 100644 --- a/clang-tools-extra/clang-tidy/objc/MissingHashCheck.h +++ b/clang-tools-extra/clang-tidy/objc/MissingHashCheck.h @@ -1,4 +1,4 @@ -//===--- MissingHashCheck.h - clang-tidy ------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.cpp b/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.cpp index 79e9d97d9594b..6a9adfe7d282d 100644 --- a/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.cpp @@ -1,4 +1,4 @@ -//===--- NSDateFormatterCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.h b/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.h index 48eb4eda192e0..dc0e89a08b680 100644 --- a/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.h +++ b/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.h @@ -1,4 +1,4 @@ -//===--- NSDateFormatterCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.cpp b/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.cpp index 8e4ed41c5f501..8a32c38a04695 100644 --- a/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.cpp @@ -1,4 +1,4 @@ -//===--- NSInvocationArgumentLifetimeCheck.cpp - clang-tidy ---------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.h b/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.h index ebb432f5fe32a..d09ea8cc10298 100644 --- a/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.h +++ b/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.h @@ -1,4 +1,4 @@ -//===--- NSInvocationArgumentLifetimeCheck.h - clang-tidy -------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/ObjCTidyModule.cpp b/clang-tools-extra/clang-tidy/objc/ObjCTidyModule.cpp index 56ccf33a6362a..c21b459964692 100644 --- a/clang-tools-extra/clang-tidy/objc/ObjCTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/objc/ObjCTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- ObjCTidyModule.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.cpp b/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.cpp index 01ee4d518b97c..f2bc6f10b9c58 100644 --- a/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.cpp @@ -1,4 +1,4 @@ -//===--- PropertyDeclarationCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h b/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h index 9950f92b8a359..c883e59321124 100644 --- a/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h +++ b/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h @@ -1,4 +1,4 @@ -//===--- PropertyDeclarationCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/SuperSelfCheck.cpp b/clang-tools-extra/clang-tidy/objc/SuperSelfCheck.cpp index 951cbc52c9a99..3c133ad7dd96b 100644 --- a/clang-tools-extra/clang-tidy/objc/SuperSelfCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/SuperSelfCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuperSelfCheck.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/objc/SuperSelfCheck.h b/clang-tools-extra/clang-tidy/objc/SuperSelfCheck.h index ec852e84397a8..baeba560a8fef 100644 --- a/clang-tools-extra/clang-tidy/objc/SuperSelfCheck.h +++ b/clang-tools-extra/clang-tidy/objc/SuperSelfCheck.h @@ -1,4 +1,4 @@ -//===--- SuperSelfCheck.h - clang-tidy --------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp index 42fb95bf10527..f9becee92e148 100644 --- a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp +++ b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp @@ -1,4 +1,4 @@ -//===--- ExceptionEscapeCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h index 3590c0eacee7f..1703f55f902ba 100644 --- a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h +++ b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h @@ -1,4 +1,4 @@ -//===--- ExceptionEscapeCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/openmp/OpenMPTidyModule.cpp b/clang-tools-extra/clang-tidy/openmp/OpenMPTidyModule.cpp index d9c9d90673408..b48fce670a041 100644 --- a/clang-tools-extra/clang-tidy/openmp/OpenMPTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/openmp/OpenMPTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- OpenMPTidyModule.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.cpp b/clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.cpp index e1c353fbe65e0..d02ab728547ae 100644 --- a/clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.cpp +++ b/clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseDefaultNoneCheck.cpp - clang-tidy -----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.h b/clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.h index 3b74f9ad78aee..fb6b528df3ffb 100644 --- a/clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.h +++ b/clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.h @@ -1,4 +1,4 @@ -//===--- UseDefaultNoneCheck.h - clang-tidy ---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/AvoidEndlCheck.cpp b/clang-tools-extra/clang-tidy/performance/AvoidEndlCheck.cpp index a394f5c6efa2a..747994c9a3c7f 100644 --- a/clang-tools-extra/clang-tidy/performance/AvoidEndlCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/AvoidEndlCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidEndlCheck.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/AvoidEndlCheck.h b/clang-tools-extra/clang-tidy/performance/AvoidEndlCheck.h index db75fbcf4e89f..860d832b807d0 100644 --- a/clang-tools-extra/clang-tidy/performance/AvoidEndlCheck.h +++ b/clang-tools-extra/clang-tidy/performance/AvoidEndlCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidEndlCheck.h - clang-tidy --------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/EnumSizeCheck.cpp b/clang-tools-extra/clang-tidy/performance/EnumSizeCheck.cpp index 0f3e9d3ef7591..edd3ded2e2858 100644 --- a/clang-tools-extra/clang-tidy/performance/EnumSizeCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/EnumSizeCheck.cpp @@ -1,4 +1,4 @@ -//===--- EnumSizeCheck.cpp - clang-tidy -----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/EnumSizeCheck.h b/clang-tools-extra/clang-tidy/performance/EnumSizeCheck.h index 4d797602ede8b..d87e6b8ab9f5e 100644 --- a/clang-tools-extra/clang-tidy/performance/EnumSizeCheck.h +++ b/clang-tools-extra/clang-tidy/performance/EnumSizeCheck.h @@ -1,4 +1,4 @@ -//===--- EnumSizeCheck.h - clang-tidy ---------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.cpp b/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.cpp index 40ea915a33299..d26480fc9f60d 100644 --- a/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.cpp @@ -1,4 +1,4 @@ -//===--- FasterStringFindCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.h b/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.h index 83af95cd69549..a7ab79a3809d4 100644 --- a/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.h +++ b/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.h @@ -1,4 +1,4 @@ -//===--- FasterStringFindCheck.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/ForRangeCopyCheck.cpp b/clang-tools-extra/clang-tidy/performance/ForRangeCopyCheck.cpp index f545a49dc184b..d0b399739bb48 100644 --- a/clang-tools-extra/clang-tidy/performance/ForRangeCopyCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/ForRangeCopyCheck.cpp @@ -1,4 +1,4 @@ -//===--- ForRangeCopyCheck.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/ForRangeCopyCheck.h b/clang-tools-extra/clang-tidy/performance/ForRangeCopyCheck.h index 8fabbfa2ae7ba..3ed05fecd015d 100644 --- a/clang-tools-extra/clang-tidy/performance/ForRangeCopyCheck.h +++ b/clang-tools-extra/clang-tidy/performance/ForRangeCopyCheck.h @@ -1,4 +1,4 @@ -//===--- ForRangeCopyCheck.h - clang-tidy------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.cpp b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.cpp index 1ecf1e14957a1..a558954b3fe1d 100644 --- a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.cpp @@ -1,4 +1,4 @@ -//===--- ImplicitConversionInLoopCheck.cpp - clang-tidy--------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h index d1764070bd4d7..786081a351070 100644 --- a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h +++ b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h @@ -1,4 +1,4 @@ -//===--- ImplicitConversionInLoopCheck.h - clang-tidy------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.cpp b/clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.cpp index ad900fcec2dee..cd128c3556725 100644 --- a/clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.cpp @@ -1,4 +1,4 @@ -//===--- InefficientAlgorithmCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.h b/clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.h index 5ab0513ea8f94..be8001a15667c 100644 --- a/clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.h +++ b/clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.h @@ -1,4 +1,4 @@ -//===--- InefficientAlgorithmCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.cpp b/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.cpp index a3f412d9e3415..92e3220fdb817 100644 --- a/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.cpp @@ -1,4 +1,4 @@ -//===--- InefficientStringConcatenationCheck.cpp - clang-tidy--------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h b/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h index 1c15f0eb49ac7..810c0109574e9 100644 --- a/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h +++ b/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h @@ -1,5 +1,4 @@ -//===--- InefficientStringConcatenationCheck.h - clang-tidy-----------*- C++ -//-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp index d87e352b00073..3da1469a9f120 100644 --- a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp @@ -1,4 +1,4 @@ -//===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h index 4f45ff490633a..9737d9d5ecb1a 100644 --- a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h +++ b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h @@ -1,4 +1,4 @@ -//===--- InefficientVectorOperationCheck.h - clang-tidy----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp index f458e26d964b0..854f09aeb0b51 100644 --- a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp @@ -1,4 +1,4 @@ -//===--- MoveConstArgCheck.cpp - clang-tidy -----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h index 1c2c430d162c7..9f67f64857168 100644 --- a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h +++ b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h @@ -1,4 +1,4 @@ -//===--- MoveConstArgCheck.h - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp b/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp index 1585a0ae36f9c..44f6d20ac2be3 100644 --- a/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp @@ -1,4 +1,4 @@ -//===--- MoveConstructorInitCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.h b/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.h index 9c1d20710f51e..7c5aec8c59fc8 100644 --- a/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.h +++ b/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.h @@ -1,4 +1,4 @@ -//===--- MoveConstructorInitCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.cpp b/clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.cpp index 1c018999432e3..2469da978d0ae 100644 --- a/clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoAutomaticMoveCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.h b/clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.h index aed230d2f1c40..af80e74f3a5b4 100644 --- a/clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.h +++ b/clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.h @@ -1,4 +1,4 @@ -//===--- NoAutomaticMoveCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.cpp b/clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.cpp index bf212595aceaf..115835ad3983e 100644 --- a/clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoIntToPtrCheck.cpp - clang-tidy ---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.h b/clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.h index 322838da115cc..ed6f60f697da4 100644 --- a/clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.h +++ b/clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.h @@ -1,4 +1,4 @@ -//===--- NoIntToPtrCheck.h - clang-tidy -------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoexceptDestructorCheck.cpp b/clang-tools-extra/clang-tidy/performance/NoexceptDestructorCheck.cpp index 4aa999ea5c0b7..dc293facb2ae1 100644 --- a/clang-tools-extra/clang-tidy/performance/NoexceptDestructorCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/NoexceptDestructorCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoexceptDestructorCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoexceptDestructorCheck.h b/clang-tools-extra/clang-tidy/performance/NoexceptDestructorCheck.h index ab3850f0970a8..ce2b1c9c17a19 100644 --- a/clang-tools-extra/clang-tidy/performance/NoexceptDestructorCheck.h +++ b/clang-tools-extra/clang-tidy/performance/NoexceptDestructorCheck.h @@ -1,4 +1,4 @@ -//===--- NoexceptDestructorCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.cpp b/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.cpp index 911cd1b533367..895bd702d3834 100644 --- a/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoexceptFunctionCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.h b/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.h index 4775219d7e439..075b4fe964d89 100644 --- a/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.h +++ b/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.h @@ -1,4 +1,4 @@ -//===--- NoexceptFunctionCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.cpp b/clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.cpp index a77ca6aebb378..75bf8aa8734d5 100644 --- a/clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoexceptMoveConstructorCheck.cpp - clang-tidy---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.h b/clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.h index 51728d2ce0d8d..11a8068aebbc4 100644 --- a/clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.h +++ b/clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.h @@ -1,4 +1,4 @@ -//===--- NoexceptMoveConstructorCheck.h - clang-tidy-------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoexceptSwapCheck.cpp b/clang-tools-extra/clang-tidy/performance/NoexceptSwapCheck.cpp index e7cba6e54e86a..29faf9f2d476c 100644 --- a/clang-tools-extra/clang-tidy/performance/NoexceptSwapCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/NoexceptSwapCheck.cpp @@ -1,4 +1,4 @@ -//===--- NoexceptSwapCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/NoexceptSwapCheck.h b/clang-tools-extra/clang-tidy/performance/NoexceptSwapCheck.h index 0330de4a50b43..9466b3a127302 100644 --- a/clang-tools-extra/clang-tidy/performance/NoexceptSwapCheck.h +++ b/clang-tools-extra/clang-tidy/performance/NoexceptSwapCheck.h @@ -1,4 +1,4 @@ -//===--- NoexceptSwapCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp b/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp index 10ad9ec6fef4c..ae15208ae3dc5 100644 --- a/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp @@ -1,4 +1,4 @@ -//===-- PerformanceTidyModule.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.cpp b/clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.cpp index adfedb4e84c47..0db66c0d5803d 100644 --- a/clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.cpp @@ -1,4 +1,4 @@ -//===--- TriviallyDestructibleCheck.cpp - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.h b/clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.h index 305844715726a..ae96359a544ce 100644 --- a/clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.h +++ b/clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.h @@ -1,4 +1,4 @@ -//===--- TriviallyDestructibleCheck.h - clang-tidy --------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp index 29f9146e47786..096ca57ee8e22 100644 --- a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp @@ -1,4 +1,4 @@ -//===--- TypePromotionInMathFnCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h index 08a7eea580221..9d9b073c80400 100644 --- a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h +++ b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h @@ -1,4 +1,4 @@ -//===--- TypePromotionInMathFnCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp index c413090b3a0a4..591836667a2ba 100644 --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp @@ -1,4 +1,4 @@ -//===--- UnnecessaryCopyInitialization.cpp - clang-tidy--------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.h b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.h index 38f756f9b452f..66231889b8014 100644 --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.h +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.h @@ -1,4 +1,4 @@ -//===--- UnnecessaryCopyInitialization.h - clang-tidy------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp index c1aa52bacf99f..3f5b43feca1ad 100644 --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp @@ -1,4 +1,4 @@ -//===--- UnnecessaryValueParamCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h index b52043416e769..571857020cef4 100644 --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h @@ -1,4 +1,4 @@ -//===--- UnnecessaryValueParamCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/AvoidPragmaOnceCheck.cpp b/clang-tools-extra/clang-tidy/portability/AvoidPragmaOnceCheck.cpp index d9569d0b5c603..a946ebf1650fc 100644 --- a/clang-tools-extra/clang-tidy/portability/AvoidPragmaOnceCheck.cpp +++ b/clang-tools-extra/clang-tidy/portability/AvoidPragmaOnceCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidPragmaOnceCheck.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/AvoidPragmaOnceCheck.h b/clang-tools-extra/clang-tidy/portability/AvoidPragmaOnceCheck.h index 203fdfd4bd33a..3638a9c46773e 100644 --- a/clang-tools-extra/clang-tidy/portability/AvoidPragmaOnceCheck.h +++ b/clang-tools-extra/clang-tidy/portability/AvoidPragmaOnceCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidPragmaOnceCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp index 98853556588b3..e73e95455d3a5 100644 --- a/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- PortabilityTidyModule.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.cpp b/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.cpp index db5693e3b7cb7..5174f56207b54 100644 --- a/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.cpp +++ b/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.cpp @@ -1,4 +1,4 @@ -//===--- RestrictSystemIncludesCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h b/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h index 60fae5e73a602..5347ae9d68b02 100644 --- a/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h +++ b/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h @@ -1,4 +1,4 @@ -//===--- RestrictSystemIncludesCheck.h - clang-tidy --------------*- C++-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.cpp b/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.cpp index f4bd4c3d5657d..d90b09abb1be8 100644 --- a/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.cpp +++ b/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.cpp @@ -1,4 +1,4 @@ -//===--- SIMDIntrinsicsCheck.cpp - clang-tidy------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.h b/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.h index 92fc0af98a25b..ab0711335c920 100644 --- a/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.h +++ b/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.h @@ -1,4 +1,4 @@ -//===--- SIMDIntrinsicsCheck.h - clang-tidy----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/StdAllocatorConstCheck.cpp b/clang-tools-extra/clang-tidy/portability/StdAllocatorConstCheck.cpp index 5a3c9a4203eb9..ff58505e8f87c 100644 --- a/clang-tools-extra/clang-tidy/portability/StdAllocatorConstCheck.cpp +++ b/clang-tools-extra/clang-tidy/portability/StdAllocatorConstCheck.cpp @@ -1,4 +1,4 @@ -//===-- StdAllocatorConstCheck.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/StdAllocatorConstCheck.h b/clang-tools-extra/clang-tidy/portability/StdAllocatorConstCheck.h index 87702af91bdb6..b2f5feac21918 100644 --- a/clang-tools-extra/clang-tidy/portability/StdAllocatorConstCheck.h +++ b/clang-tools-extra/clang-tidy/portability/StdAllocatorConstCheck.h @@ -1,4 +1,4 @@ -//===--- StdAllocatorConstT.h - clang-tidy -----------------------*- C++-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/TemplateVirtualMemberFunctionCheck.cpp b/clang-tools-extra/clang-tidy/portability/TemplateVirtualMemberFunctionCheck.cpp index aaa23367a3825..bf3173dc993e2 100644 --- a/clang-tools-extra/clang-tidy/portability/TemplateVirtualMemberFunctionCheck.cpp +++ b/clang-tools-extra/clang-tidy/portability/TemplateVirtualMemberFunctionCheck.cpp @@ -1,4 +1,4 @@ -//===--- TemplateVirtualMemberFunctionCheck.cpp - clang-tidy --------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/portability/TemplateVirtualMemberFunctionCheck.h b/clang-tools-extra/clang-tidy/portability/TemplateVirtualMemberFunctionCheck.h index 41f92adadd6e8..01d5519d7e6fd 100644 --- a/clang-tools-extra/clang-tidy/portability/TemplateVirtualMemberFunctionCheck.h +++ b/clang-tools-extra/clang-tidy/portability/TemplateVirtualMemberFunctionCheck.h @@ -1,4 +1,4 @@ -//===--- TemplateVirtualMemberFunctionCheck.h - clang-tidy ------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AmbiguousSmartptrResetCallCheck.cpp b/clang-tools-extra/clang-tidy/readability/AmbiguousSmartptrResetCallCheck.cpp index 5f36c3976fc69..22ff5ce1545a5 100644 --- a/clang-tools-extra/clang-tidy/readability/AmbiguousSmartptrResetCallCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/AmbiguousSmartptrResetCallCheck.cpp @@ -1,4 +1,4 @@ -//===--- AmbiguousSmartptrResetCallCheck.cpp - clang-tidy -----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AmbiguousSmartptrResetCallCheck.h b/clang-tools-extra/clang-tidy/readability/AmbiguousSmartptrResetCallCheck.h index 05932e59e7928..763cd7f01f9c3 100644 --- a/clang-tools-extra/clang-tidy/readability/AmbiguousSmartptrResetCallCheck.h +++ b/clang-tools-extra/clang-tidy/readability/AmbiguousSmartptrResetCallCheck.h @@ -1,4 +1,4 @@ -//===--- AmbiguousSmartptrResetCallCheck.h - clang-tidy ---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.cpp b/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.cpp index 24cbbd8bc60a2..554996730c2be 100644 --- a/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.cpp +++ b/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.cpp @@ -1,4 +1,4 @@ -//===--- AvoidConstParamsInDecls.cpp - clang-tidy--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h b/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h index 1fc57779111df..1dd28fde217ed 100644 --- a/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h +++ b/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h @@ -1,4 +1,4 @@ -//===--- AvoidConstParamsInDecls.h - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.cpp b/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.cpp index 1b62f54d5557d..35e5462b55cce 100644 --- a/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidNestedConditionalOperatorCheck.cpp - clang-tidy -------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.h b/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.h index 9010156de6ce2..b14af6a0cf1c7 100644 --- a/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.h +++ b/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidNestedConditionalOperatorCheck.h - clang-tidy -----*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.cpp b/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.cpp index d283111a4de1a..40a4fa114681e 100644 --- a/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidReturnWithVoidValueCheck.cpp - clang-tidy -------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.h b/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.h index f8148db43cd95..93e6268fd5dd5 100644 --- a/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.h +++ b/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidReturnWithVoidValueCheck.h - clang-tidy -----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AvoidUnconditionalPreprocessorIfCheck.cpp b/clang-tools-extra/clang-tidy/readability/AvoidUnconditionalPreprocessorIfCheck.cpp index ca5fc358ce290..c53c70667dbbc 100644 --- a/clang-tools-extra/clang-tidy/readability/AvoidUnconditionalPreprocessorIfCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/AvoidUnconditionalPreprocessorIfCheck.cpp @@ -1,4 +1,4 @@ -//===--- AvoidUnconditionalPreprocessorIfCheck.cpp - clang-tidy -----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/AvoidUnconditionalPreprocessorIfCheck.h b/clang-tools-extra/clang-tidy/readability/AvoidUnconditionalPreprocessorIfCheck.h index 50292fce9d8dc..2382a5e928972 100644 --- a/clang-tools-extra/clang-tidy/readability/AvoidUnconditionalPreprocessorIfCheck.h +++ b/clang-tools-extra/clang-tidy/readability/AvoidUnconditionalPreprocessorIfCheck.h @@ -1,4 +1,4 @@ -//===--- AvoidUnconditionalPreprocessorIfCheck.h - clang-tidy ---*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp index 85bd9c1e4f9a0..1952e14d1fc3d 100644 --- a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp @@ -1,4 +1,4 @@ -//===--- BracesAroundStatementsCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h index 4cd37a7b2dd6c..183f1fa8b8a8e 100644 --- a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h +++ b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h @@ -1,4 +1,4 @@ -//===--- BracesAroundStatementsCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp index c13a8010c2221..6ccd933ff4c21 100644 --- a/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp @@ -1,4 +1,4 @@ -//===--- ConstReturnTypeCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.h b/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.h index a36c6f4b67e5a..e3d9713d430ce 100644 --- a/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.h @@ -1,4 +1,4 @@ -//===--- ConstReturnTypeCheck.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp index fb68c7d334b7f..04c1aa2fab8e6 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp @@ -1,4 +1,4 @@ -//===--- ContainerContainsCheck.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h index 753603ed82537..e419785060df0 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h @@ -1,4 +1,4 @@ -//===--- ContainerContainsCheck.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp index a05e228520c9e..11756d10a8221 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp @@ -1,4 +1,4 @@ -//===--- ContainerDataPointerCheck.cpp - clang-tidy -----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h index 2a15b95095171..71fde87fbb093 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h @@ -1,4 +1,4 @@ -//===--- ContainerDataPointerCheck.h - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp index c3f8106c34dcb..11faf1622e4e8 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp @@ -1,4 +1,4 @@ -//===--- ContainerSizeEmptyCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.h b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.h index e449686f77566..35ef18430378b 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.h @@ -1,4 +1,4 @@ -//===--- ContainerSizeEmptyCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp index d6784d0e8fba8..6da4cf7c6bf94 100644 --- a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp +++ b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp @@ -1,4 +1,4 @@ -//===--- ConvertMemberFunctionsToStatic.cpp - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h index 1b12fec972998..ee83d7b4784ff 100644 --- a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h +++ b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h @@ -1,4 +1,4 @@ -//===--- ConvertMemberFunctionsToStatic.h - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp b/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp index 12131cc078f0b..e96bfe7fe7271 100644 --- a/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp @@ -1,4 +1,4 @@ -//===--- DeleteNullPointerCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h b/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h index 6e746d803d3ee..dc88646f07afa 100644 --- a/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h +++ b/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h @@ -1,4 +1,4 @@ -//===--- DeleteNullPointerCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp index 229e5583846b9..570a109e55b14 100644 --- a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp @@ -1,4 +1,4 @@ -//===--- DuplicateIncludeCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h index 05395496d841b..297999cf4f921 100644 --- a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h +++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h @@ -1,4 +1,4 @@ -//===--- DuplicateIncludeCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.cpp b/clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.cpp index f68e1f6926b84..6399e7d99a9c7 100644 --- a/clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.cpp @@ -1,4 +1,4 @@ -//===--- ElseAfterReturnCheck.cpp - clang-tidy-----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.h b/clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.h index 34860c2853ea8..ab025032317c7 100644 --- a/clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.h @@ -1,4 +1,4 @@ -//===--- ElseAfterReturnCheck.h - clang-tidy---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp index 9eef5c4db2d01..a2a5c3e10ee07 100644 --- a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp @@ -1,4 +1,4 @@ -//===--- EnumInitialValueCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.h b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.h index 66087e4ee170d..f070f867b6af8 100644 --- a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.h +++ b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.h @@ -1,4 +1,4 @@ -//===--- EnumInitialValueCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp index 2f59aaa86b157..f9d81212e2842 100644 --- a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp @@ -1,4 +1,4 @@ -//===--- FunctionCognitiveComplexityCheck.cpp - clang-tidy ------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h index bdb8550eeae23..455fbfd9fa56a 100644 --- a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h +++ b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h @@ -1,4 +1,4 @@ -//===--- FunctionCognitiveComplexityCheck.h - clang-tidy --------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp b/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp index 8e3a2e306dbf7..8c58346ede3fa 100644 --- a/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp @@ -1,4 +1,4 @@ -//===-- FunctionSizeCheck.cpp - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.h b/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.h index f668ab18fea52..0459db6abfe31 100644 --- a/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.h +++ b/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.h @@ -1,4 +1,4 @@ -//===--- FunctionSizeCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.cpp index 50f8a6be06e46..877f0a45f9ea7 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.cpp @@ -1,5 +1,4 @@ -//===--- IdentifierLengthCheck.cpp - clang-tidy -//-----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.h index 2a4b810264e96..9626e2251426d 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.h +++ b/clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.h @@ -1,5 +1,4 @@ -//===--- IdentifierLengthCheck.h - clang-tidy ---------------------*- C++ -//-*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index c8b62211c4b2e..af85f251da14b 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -1,4 +1,4 @@ -//===--- IdentifierNamingCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h index 646ec0eac8dd1..3db9d23150af3 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h @@ -1,4 +1,4 @@ -//===--- IdentifierNamingCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp index 6b10e6b206a31..3fb856097a7e9 100644 --- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp @@ -1,4 +1,4 @@ -//===--- ImplicitBoolConversionCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h index 5947f7316e67c..8028a31719644 100644 --- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h @@ -1,4 +1,4 @@ -//===--- ImplicitBoolConversionCheck.h - clang-tidy--------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp b/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp index 10aa779117bbd..2eb26fcf840cd 100644 --- a/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp @@ -1,4 +1,4 @@ -//===--- InconsistentDeclarationParameterNameCheck.cpp - clang-tidy-------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.cpp b/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.cpp index ca6503753f6b4..bc5edecb8a65b 100644 --- a/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.cpp @@ -1,4 +1,4 @@ -//===--- IsolateDeclarationCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h b/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h index 63e37a48ca418..c7e1ea33a0d0d 100644 --- a/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h +++ b/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h @@ -1,4 +1,4 @@ -//===--- IsolateDeclarationCheck.h - clang-tidy -----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.cpp b/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.cpp index 6f91527c420e8..a38f7bc029e8b 100644 --- a/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.cpp @@ -1,4 +1,4 @@ -//===--- MagicNumbersCheck.cpp - clang-tidy-------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.h b/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.h index 70a17889d244e..b703bd4ba984f 100644 --- a/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.h +++ b/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.h @@ -1,4 +1,4 @@ -//===--- MagicNumbersCheck.h - clang-tidy-----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.cpp b/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.cpp index aace96f54c61c..bea68884e3bda 100644 --- a/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.cpp @@ -1,4 +1,4 @@ -//===--- MakeMemberFunctionConstCheck.cpp - clang-tidy --------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.h b/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.h index dc8d98332793e..6be832260bd18 100644 --- a/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.h +++ b/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.h @@ -1,4 +1,4 @@ -//===--- MakeMemberFunctionConstCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp index e0640f27f4e35..e15b2ecd8f5c0 100644 --- a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp @@ -1,4 +1,4 @@ -//===--- MathMissingParenthesesCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h index 9a9d2b3cfaaba..3381d6612a709 100644 --- a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h +++ b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h @@ -1,4 +1,4 @@ -//===--- MathMissingParenthesesCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.cpp b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.cpp index e32f79589a059..0765d8d82ee04 100644 --- a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.cpp @@ -1,4 +1,4 @@ -//===--- MisleadingIndentationCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h index 9c92fc1e18b6f..39bb4baba5141 100644 --- a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h +++ b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h @@ -1,4 +1,4 @@ -//===--- MisleadingIndentationCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.cpp b/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.cpp index 328d1896ce9f8..0052af6f5d1d1 100644 --- a/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.cpp @@ -1,4 +1,4 @@ -//===--- MisplacedArrayIndexCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h b/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h index 1ccd011b30fff..1b11b6bea108e 100644 --- a/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h +++ b/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h @@ -1,4 +1,4 @@ -//===--- MisplacedArrayIndexCheck.h - clang-tidy-----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp index 6bb8c394f75cc..7251d63edfd89 100644 --- a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp @@ -1,4 +1,4 @@ -//===--- NamedParameterCheck.cpp - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.h b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.h index f14a74d75eb49..ecd128d887f84 100644 --- a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.h +++ b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.h @@ -1,4 +1,4 @@ -//===--- NamedParameterCheck.h - clang-tidy ---------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp index c04bf361c40ca..744d23a6fdbcd 100644 --- a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp @@ -1,4 +1,4 @@ -//===--- NamespaceCommentCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.h b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.h index 8edd77213f779..883a2a44fee8d 100644 --- a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.h +++ b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.h @@ -1,4 +1,4 @@ -//===--- NamespaceCommentCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp index 07071a1f6d2fe..29fff3971599e 100644 --- a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp @@ -1,4 +1,4 @@ -//===--- NonConstParameterCheck.cpp - clang-tidy---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h index e2598dd01d297..61d6ebd4c2f2a 100644 --- a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h +++ b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h @@ -1,4 +1,4 @@ -//===--- NonConstParameterCheck.h - clang-tidy-------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/OperatorsRepresentationCheck.cpp b/clang-tools-extra/clang-tidy/readability/OperatorsRepresentationCheck.cpp index ccaa686f85323..196fb31bd4b7a 100644 --- a/clang-tools-extra/clang-tidy/readability/OperatorsRepresentationCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/OperatorsRepresentationCheck.cpp @@ -1,5 +1,4 @@ -//===--- OperatorsRepresentationCheck.cpp - clang-tidy -//--------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/OperatorsRepresentationCheck.h b/clang-tools-extra/clang-tidy/readability/OperatorsRepresentationCheck.h index d315f3912a914..f1a9793481ada 100644 --- a/clang-tools-extra/clang-tidy/readability/OperatorsRepresentationCheck.h +++ b/clang-tools-extra/clang-tidy/readability/OperatorsRepresentationCheck.h @@ -1,4 +1,4 @@ -//===--- OperatorsRepresentationCheck.h - clang-tidy ------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp b/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp index 44a784bc9f21a..dc9510d1dab62 100644 --- a/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp @@ -1,4 +1,4 @@ -//===--- QualifiedAutoCheck.cpp - clang-tidy ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.h b/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.h index b5b713f3db6cf..c63b426bda7c8 100644 --- a/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.h +++ b/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.h @@ -1,4 +1,4 @@ -//===--- QualifiedAutoCheck.h - clang-tidy ----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp index 12f8cdb289dd2..d01882dfc9daa 100644 --- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- ReadabilityTidyModule.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.cpp index c3464b2a83d15..e93aa16ebdb13 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantAccessSpecifiersCheck.cpp - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.h index 566e5ea637986..6359dafc0e4eb 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantAccessSpecifiersCheck.h - clang-tidy ----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantCastingCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantCastingCheck.cpp index acc834ae25c60..1ee75220b1c4e 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantCastingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantCastingCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantCastingCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantCastingCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantCastingCheck.h index fdcfede05d436..97c87fb8b09a1 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantCastingCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantCastingCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantCastingCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.cpp index d93077cc6884e..b3b84e2cc0ccd 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantControlFlowCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h index 7433005bb7a37..7698996d107e4 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantControlFlowCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.cpp index e86e866209e9a..cf6e92d84e92a 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantDeclarationCheck.cpp - clang-tidy------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h index a14a8aa70f6cf..fff7827c6378a 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantDeclarationCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.cpp index a70719fd8a041..7f399997cfecf 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantFunctionPtrDereferenceCheck.cpp - clang-tidy-------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.h index a04e9c165bc03..f4a3671b0f7d1 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantFunctionPtrDereferenceCheck.h - clang-tidy-----*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.cpp index 7f1882c775c59..2053b89ada7e2 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantInlineSpecifierCheck.cpp - clang-tidy--------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.h index 63b1b46bb7e09..d1134b307a909 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantInlineSpecifierCheck.h - clang-tidy ------------*-C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp index 2373dde1618bc..1bbb9c86fee14 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantMemberInitCheck.cpp - clang-tidy-------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h index c0e0a6dac0dbc..2ce8c3f5f64f5 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantMemberInitCheck.h - clang-tidy----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.cpp index 513687f03df0c..931126a154d1e 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantPreprocessorCheck.cpp - clang-tidy ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.h index 8a6fb6fd98b33..ca34f9783c619 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantPreprocessorCheck.h - clang-tidy --------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.cpp index 9774d93ff36fd..0598683bff6c2 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.cpp @@ -1,4 +1,4 @@ -//===--- RedundantSmartptrGetCheck.cpp - clang-tidy -----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.h index 1f90e4fb4a8be..be9e916cc86be 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantSmartptrGetCheck.h - clang-tidy ---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/RedundantStringCStrCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantStringCStrCheck.h index e2e6ab1fd939c..ac82778853747 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantStringCStrCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantStringCStrCheck.h @@ -1,4 +1,4 @@ -//===--- RedundantStringCStrCheck.h - clang-tidy ----------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ReferenceToConstructedTemporaryCheck.cpp b/clang-tools-extra/clang-tidy/readability/ReferenceToConstructedTemporaryCheck.cpp index 587ae8ea30580..5d3fd14b92471 100644 --- a/clang-tools-extra/clang-tidy/readability/ReferenceToConstructedTemporaryCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReferenceToConstructedTemporaryCheck.cpp @@ -1,5 +1,4 @@ -//===--- ReferenceToConstructedTemporaryCheck.cpp - clang-tidy -//--------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/ReferenceToConstructedTemporaryCheck.h b/clang-tools-extra/clang-tidy/readability/ReferenceToConstructedTemporaryCheck.h index c1f4f1c4d47dd..c95f65a3ec691 100644 --- a/clang-tools-extra/clang-tidy/readability/ReferenceToConstructedTemporaryCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ReferenceToConstructedTemporaryCheck.h @@ -1,4 +1,4 @@ -//===--- ReferenceToConstructedTemporaryCheck.h - clang-tidy ----*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp index 499c88ef5d4e4..4184c295b5f0a 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp @@ -1,4 +1,4 @@ -//===-- SimplifyBooleanExprCheck.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h index 2ea6968798408..2ab074e5dca69 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h @@ -1,4 +1,4 @@ -//===--- SimplifyBooleanExpr.h clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.cpp b/clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.cpp index 7d4698d27ed16..591ee1fbe067c 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.cpp @@ -1,4 +1,4 @@ -//===--- SimplifySubscriptExprCheck.cpp - clang-tidy-----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.h b/clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.h index deffb09f5db28..79ced95fd762c 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.h +++ b/clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.h @@ -1,4 +1,4 @@ -//===--- SimplifySubscriptExprCheck.h - clang-tidy---------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.cpp b/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.cpp index a7b3c4a1f7cf9..7ef8ef3d947f3 100644 --- a/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.cpp @@ -1,4 +1,4 @@ -//===--- StaticAccessedThroughInstanceCheck.cpp - clang-tidy---------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h b/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h index 9869855c17d6b..5b47bf7685bbf 100644 --- a/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h +++ b/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h @@ -1,4 +1,4 @@ -//===--- StaticAccessedThroughInstanceCheck.h - clang-tidy-------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.cpp index a0f59dbcb4890..e9a2eae11bfde 100644 --- a/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.cpp @@ -1,4 +1,4 @@ -//===--- StaticDefinitionInAnonymousNamespaceCheck.cpp - clang-tidy--------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h b/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h index 620cd6e3f2f87..9207ba0075b5d 100644 --- a/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h +++ b/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h @@ -1,4 +1,4 @@ -//===--- StaticDefinitionInAnonymousNamespaceCheck.h - clang-tidy*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/StringCompareCheck.cpp b/clang-tools-extra/clang-tidy/readability/StringCompareCheck.cpp index 7c0bbef3ca087..229b5159d53d1 100644 --- a/clang-tools-extra/clang-tidy/readability/StringCompareCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/StringCompareCheck.cpp @@ -1,4 +1,4 @@ -//===-- StringCompareCheck.cpp - clang-tidy--------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/StringCompareCheck.h b/clang-tools-extra/clang-tidy/readability/StringCompareCheck.h index 150090901a6e9..9ff80b075f101 100644 --- a/clang-tools-extra/clang-tidy/readability/StringCompareCheck.h +++ b/clang-tools-extra/clang-tidy/readability/StringCompareCheck.h @@ -1,4 +1,4 @@ -//===--- StringCompareCheck.h - clang-tidy-----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp index a80637dee18f4..ad8b47aa96425 100644 --- a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp @@ -1,4 +1,4 @@ -//===--- SuspiciousCallArgumentCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.h b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.h index 38477d0800f15..43ae0f181302f 100644 --- a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.h +++ b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.h @@ -1,4 +1,4 @@ -//===--- SuspiciousCallArgumentCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.cpp b/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.cpp index 462085b023179..c9d70419af24b 100644 --- a/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.cpp @@ -1,4 +1,4 @@ -//===--- UniqueptrDeleteReleaseCheck.cpp - clang-tidy----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h b/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h index 2768955109d26..f7d6fe70058fc 100644 --- a/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h +++ b/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h @@ -1,4 +1,4 @@ -//===--- UniqueptrDeleteReleaseCheck.h - clang-tidy--------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp b/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp index dac4cb556aa75..c1dc209fd079d 100644 --- a/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp @@ -1,4 +1,4 @@ -//===--- UppercaseLiteralSuffixCheck.cpp - clang-tidy ---------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.h b/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.h index a8af08f5a8021..7c71fe064f3c9 100644 --- a/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.h +++ b/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.h @@ -1,4 +1,4 @@ -//===--- UppercaseLiteralSuffixCheck.h - clang-tidy -------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.cpp b/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.cpp index 7cf0e0853f080..82eb6de8fa3dc 100644 --- a/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseAnyOfAllOfCheck.cpp - clang-tidy-------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h b/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h index 4e53b3f5a8a91..4b7ffc1f36ace 100644 --- a/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h +++ b/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h @@ -1,4 +1,4 @@ -//===--- UseAnyOfAllOfCheck.h - clang-tidy-----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.cpp b/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.cpp index 05c0088e6b41b..40aaff4cb3893 100644 --- a/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseConcisePreprocessorDirectivesCheck.cpp - clang-tidy -----------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.h b/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.h index e65b16876a89a..762862dc00305 100644 --- a/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.h +++ b/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.h @@ -1,4 +1,4 @@ -//===--- UseConcisePreprocessorDirectivesCheck.h - clang-tidy ---*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp b/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp index 511256332cee9..8052e04c99f43 100644 --- a/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseStdMinMaxCheck.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.h b/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.h index b8d8b8c4fe894..573394361cbda 100644 --- a/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.h +++ b/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.h @@ -1,4 +1,4 @@ -//===--- UseStdMinMaxCheck.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/rename_check.py b/clang-tools-extra/clang-tidy/rename_check.py index 5f3295b23ba72..b864bff814485 100755 --- a/clang-tools-extra/clang-tidy/rename_check.py +++ b/clang-tools-extra/clang-tidy/rename_check.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# ===- rename_check.py - clang-tidy check renamer ------------*- python -*--===# +# ===-----------------------------------------------------------------------===# # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. @@ -51,30 +51,6 @@ def replaceInFile(fileName: str, sFrom: str, sTo: str) -> None: f.write(txt) -def generateCommentLineHeader(filename: str) -> str: - return "".join( - [ - "//===--- ", - os.path.basename(filename), - " - clang-tidy ", - "-" * max(0, 42 - len(os.path.basename(filename))), - "*- C++ -*-===//", - ] - ) - - -def generateCommentLineSource(filename: str) -> str: - return "".join( - [ - "//===--- ", - os.path.basename(filename), - " - clang-tidy", - "-" * max(0, 52 - len(os.path.basename(filename))), - "-===//", - ] - ) - - def fileRename(fileName: str, sFrom: str, sTo: str) -> str: if sFrom not in fileName or sFrom == sTo: return fileName @@ -337,16 +313,6 @@ def main() -> None: ) filename = fileRename(filename, args.old_check_name, args.new_check_name) filename = fileRename(filename, check_name_camel, new_check_name_camel) - replaceInFile( - filename, - generateCommentLineHeader(originalName), - generateCommentLineHeader(filename), - ) - replaceInFile( - filename, - generateCommentLineSource(originalName), - generateCommentLineSource(filename), - ) for header_guard in header_guard_variants: replaceInFile(filename, header_guard, header_guard_new) diff --git a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp index bef3b938b5afd..35ea1b5714b84 100644 --- a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp +++ b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp @@ -1,4 +1,4 @@ -//===--- tools/extra/clang-tidy/ClangTidyMain.cpp - Clang tidy tool -------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.h b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.h index f3862f93d833b..35f75396828dd 100644 --- a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.h +++ b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.h @@ -1,4 +1,4 @@ -//===--- tools/extra/clang-tidy/ClangTidyMain.h - Clang tidy tool -------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/tool/ClangTidyToolMain.cpp b/clang-tools-extra/clang-tidy/tool/ClangTidyToolMain.cpp index eb7fde7b8e07b..ea2897dfe1390 100644 --- a/clang-tools-extra/clang-tidy/tool/ClangTidyToolMain.cpp +++ b/clang-tools-extra/clang-tidy/tool/ClangTidyToolMain.cpp @@ -1,4 +1,4 @@ -//===--- tools/extra/clang-tidy/ClangTidyToolMain.cpp - Clang tidy tool ---===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py b/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py index b4b4648e765cf..5daa93dca2a99 100755 --- a/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py +++ b/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# ===- clang-tidy-diff.py - ClangTidy Diff Checker -----------*- python -*--===# +# ===-----------------------------------------------------------------------===# # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py index a722e20a81c68..eadf7194ab94f 100755 --- a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py +++ b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# ===- run-clang-tidy.py - Parallel clang-tidy runner --------*- python -*--===# +# ===-----------------------------------------------------------------------===# # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp b/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp index 0cdc7d08abc99..d5deb99a8442d 100644 --- a/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp @@ -1,4 +1,4 @@ -//===---------- ASTUtils.cpp - clang-tidy ---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/ASTUtils.h b/clang-tools-extra/clang-tidy/utils/ASTUtils.h index 6c3e54facd020..c2127f0746986 100644 --- a/clang-tools-extra/clang-tidy/utils/ASTUtils.h +++ b/clang-tools-extra/clang-tidy/utils/ASTUtils.h @@ -1,4 +1,4 @@ -//===---------- ASTUtils.h - clang-tidy -----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/Aliasing.cpp b/clang-tools-extra/clang-tidy/utils/Aliasing.cpp index cbe4873b5c022..a22d2358bc560 100644 --- a/clang-tools-extra/clang-tidy/utils/Aliasing.cpp +++ b/clang-tools-extra/clang-tidy/utils/Aliasing.cpp @@ -1,4 +1,4 @@ -//===------------- Aliasing.cpp - clang-tidy ------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/Aliasing.h b/clang-tools-extra/clang-tidy/utils/Aliasing.h index 6c0763b766805..2384534609366 100644 --- a/clang-tools-extra/clang-tidy/utils/Aliasing.h +++ b/clang-tools-extra/clang-tidy/utils/Aliasing.h @@ -1,4 +1,4 @@ -//===------------- Aliasing.h - clang-tidy --------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp index 2a3b7bed08c1e..14770c49c2e25 100644 --- a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp +++ b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp @@ -1,4 +1,4 @@ -//===--- BracesAroundStatement.cpp - clang-tidy -------- ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h index cb1c06c7aa1a1..699d75435db7b 100644 --- a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h +++ b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h @@ -1,4 +1,4 @@ -//===--- BracesAroundStatement.h - clang-tidy ------- -----------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp index 106feb7fb4172..57453ad089a2c 100644 --- a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp @@ -1,4 +1,4 @@ -//===--- DeclRefExprUtils.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.h b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.h index 8361b9d89ed26..794adc04dc478 100644 --- a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.h +++ b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.h @@ -1,4 +1,4 @@ -//===--- DeclRefExprUtils.h - clang-tidy-------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.cpp b/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.cpp index d43716e901e84..044f89be61342 100644 --- a/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.cpp +++ b/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.cpp @@ -1,4 +1,4 @@ -//===--- DesignatedInitializers.cpp - clang-tidy --------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.h b/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.h index a6cb2963faf72..910960137ddbb 100644 --- a/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.h +++ b/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.h @@ -1,4 +1,4 @@ -//===--- DesignatedInitializers.h - clang-tidy ------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp index 3fe8412e69675..bdde7249d2796 100644 --- a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp +++ b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp @@ -1,4 +1,4 @@ -//===--- ExceptionAnalyzer.cpp - clang-tidy -------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h index bd466c99c04bb..1ab6dcb2eb255 100644 --- a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h +++ b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h @@ -1,4 +1,4 @@ -//===--- ExceptionAnalyzer.h - clang-tidy -----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.cpp b/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.cpp index 4693c656a6602..b1d6b195f9470 100644 --- a/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.cpp +++ b/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.cpp @@ -1,4 +1,4 @@ -//===--- ExceptionSpecAnalyzer.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.h b/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.h index ddfb796d9c546..3fd6fe170c734 100644 --- a/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.h +++ b/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.h @@ -1,4 +1,4 @@ -//===--- ExceptionSpecAnalyzer.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/ExprSequence.cpp b/clang-tools-extra/clang-tidy/utils/ExprSequence.cpp index fcbb5ecc7152d..393f935fc31e4 100644 --- a/clang-tools-extra/clang-tidy/utils/ExprSequence.cpp +++ b/clang-tools-extra/clang-tidy/utils/ExprSequence.cpp @@ -1,4 +1,4 @@ -//===---------- ExprSequence.cpp - clang-tidy -----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/ExprSequence.h b/clang-tools-extra/clang-tidy/utils/ExprSequence.h index 6531e1876c4fe..9ef94e0e3bcde 100644 --- a/clang-tools-extra/clang-tidy/utils/ExprSequence.h +++ b/clang-tools-extra/clang-tidy/utils/ExprSequence.h @@ -1,4 +1,4 @@ -//===------------- ExprSequence.h - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.cpp b/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.cpp index b40bba6d1f3ab..41d5131599ce6 100644 --- a/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.cpp @@ -1,4 +1,4 @@ -//===--- FileExtensionsUtils.cpp - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h b/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h index e23f6b79c6af4..dfab141e32417 100644 --- a/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h +++ b/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h @@ -1,4 +1,4 @@ -//===--- FileExtensionsUtils.h - clang-tidy --------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/FixItHintUtils.cpp b/clang-tools-extra/clang-tidy/utils/FixItHintUtils.cpp index a15589f9721c7..086c7f3a15d45 100644 --- a/clang-tools-extra/clang-tidy/utils/FixItHintUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/FixItHintUtils.cpp @@ -1,4 +1,4 @@ -//===--- FixItHintUtils.cpp - clang-tidy-----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/FixItHintUtils.h b/clang-tools-extra/clang-tidy/utils/FixItHintUtils.h index e690dbaefe642..74608d4ff268f 100644 --- a/clang-tools-extra/clang-tidy/utils/FixItHintUtils.h +++ b/clang-tools-extra/clang-tidy/utils/FixItHintUtils.h @@ -1,4 +1,4 @@ -//===--- FixItHintUtils.h - clang-tidy---------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp index 0d0834dc38fc6..f4945b2113c69 100644 --- a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp +++ b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp @@ -1,4 +1,4 @@ -//===--- FormatStringConverter.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.h b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.h index 15d1f597fe440..209741fac276c 100644 --- a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.h +++ b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.h @@ -1,4 +1,4 @@ -//===--- FormatStringConverter.h - clang-tidy--------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/HeaderGuard.cpp b/clang-tools-extra/clang-tidy/utils/HeaderGuard.cpp index 53ce28e019f75..e1d13876d64a9 100644 --- a/clang-tools-extra/clang-tidy/utils/HeaderGuard.cpp +++ b/clang-tools-extra/clang-tidy/utils/HeaderGuard.cpp @@ -1,4 +1,4 @@ -//===--- HeaderGuard.cpp - clang-tidy -------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/HeaderGuard.h b/clang-tools-extra/clang-tidy/utils/HeaderGuard.h index eff75d6ff26a2..ce8acb07783b3 100644 --- a/clang-tools-extra/clang-tidy/utils/HeaderGuard.h +++ b/clang-tools-extra/clang-tidy/utils/HeaderGuard.h @@ -1,4 +1,4 @@ -//===--- HeaderGuard.h - clang-tidy -----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp index b53016f331b79..0b67cba6ffb0a 100644 --- a/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp +++ b/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp @@ -1,4 +1,4 @@ -//===-------- IncludeInserter.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/IncludeInserter.h b/clang-tools-extra/clang-tidy/utils/IncludeInserter.h index 5308f76bd2151..f6ca7d63632de 100644 --- a/clang-tools-extra/clang-tidy/utils/IncludeInserter.h +++ b/clang-tools-extra/clang-tidy/utils/IncludeInserter.h @@ -1,4 +1,4 @@ -//===---------- IncludeInserter.h - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp index db1ea1bb514f8..6a71a11c18754 100644 --- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp +++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp @@ -1,4 +1,4 @@ -//===---------- IncludeSorter.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h index 782fa6721bc03..ce752c45f2a77 100644 --- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h +++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h @@ -1,4 +1,4 @@ -//===------------ IncludeSorter.h - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp index c14d341caf779..7222f64804f63 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp @@ -1,4 +1,4 @@ -//===--- LexerUtils.cpp - clang-tidy---------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h b/clang-tools-extra/clang-tidy/utils/LexerUtils.h index afd63885e388c..b76a37874b514 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h @@ -1,4 +1,4 @@ -//===--- LexerUtils.h - clang-tidy-------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/Matchers.cpp b/clang-tools-extra/clang-tidy/utils/Matchers.cpp index bd7b03eb39ad7..4382745c8bdc5 100644 --- a/clang-tools-extra/clang-tidy/utils/Matchers.cpp +++ b/clang-tools-extra/clang-tidy/utils/Matchers.cpp @@ -1,4 +1,4 @@ -//===---------- Matchers.cpp - clang-tidy ---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/Matchers.h b/clang-tools-extra/clang-tidy/utils/Matchers.h index a7683024d69c4..6caa35de3c98f 100644 --- a/clang-tools-extra/clang-tidy/utils/Matchers.h +++ b/clang-tools-extra/clang-tidy/utils/Matchers.h @@ -1,4 +1,4 @@ -//===--- Matchers.h - clang-tidy-------------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.cpp b/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.cpp index f5949bab8f243..3af7f8dcf2ee5 100644 --- a/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.cpp +++ b/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.cpp @@ -1,4 +1,4 @@ -//===---------- NamespaceAliaser.cpp - clang-tidy -------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h b/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h index df4d4b95ba421..497b67e82a900 100644 --- a/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h +++ b/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h @@ -1,4 +1,4 @@ -//===---------- NamespaceAliaser.h - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/OptionsUtils.cpp b/clang-tools-extra/clang-tidy/utils/OptionsUtils.cpp index 1866ea3f5b58a..2f784360ac7ec 100644 --- a/clang-tools-extra/clang-tidy/utils/OptionsUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/OptionsUtils.cpp @@ -1,4 +1,4 @@ -//===-- OptionsUtils.cpp - clang-tidy -------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/OptionsUtils.h b/clang-tools-extra/clang-tidy/utils/OptionsUtils.h index f15c07fe47fad..aec24ab0a84b3 100644 --- a/clang-tools-extra/clang-tidy/utils/OptionsUtils.h +++ b/clang-tools-extra/clang-tidy/utils/OptionsUtils.h @@ -1,4 +1,4 @@ -//===--- DanglingHandleCheck.h - clang-tidy----------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp index 24d346bdfaa53..70f6092a5e4bc 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -1,4 +1,4 @@ -//===--- RenamerClangTidyCheck.cpp - clang-tidy ---------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h index 3d5721b789ac2..68b3040895417 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h @@ -1,4 +1,4 @@ -//===--- RenamerClangTidyCheck.h - clang-tidy -------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp index 7d84a4a9331b1..87602d1187d59 100644 --- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp @@ -1,4 +1,4 @@ -//===---------- TransformerClangTidyCheck.cpp - clang-tidy ----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h index 3f5c4cac52b7b..ad20fbd475759 100644 --- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h @@ -1,4 +1,4 @@ -//===---------- TransformerClangTidyCheck.h - clang-tidy ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/TypeTraits.cpp b/clang-tools-extra/clang-tidy/utils/TypeTraits.cpp index f944306171135..d4e079f1cf4c2 100644 --- a/clang-tools-extra/clang-tidy/utils/TypeTraits.cpp +++ b/clang-tools-extra/clang-tidy/utils/TypeTraits.cpp @@ -1,4 +1,4 @@ -//===--- TypeTraits.cpp - clang-tidy---------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/TypeTraits.h b/clang-tools-extra/clang-tidy/utils/TypeTraits.h index eb4dd0ff3a510..98a4a99bf8d4d 100644 --- a/clang-tools-extra/clang-tidy/utils/TypeTraits.h +++ b/clang-tools-extra/clang-tidy/utils/TypeTraits.h @@ -1,4 +1,4 @@ -//===--- TypeTraits.h - clang-tidy-------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp index 25601f9a01a48..cb1495163a2f9 100644 --- a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp @@ -1,4 +1,4 @@ -//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h index a5ba6802dd89e..b85a157ba2873 100644 --- a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h +++ b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h @@ -1,4 +1,4 @@ -//===--- UseRangesCheck.h - clang-tidy --------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/UsingInserter.cpp b/clang-tools-extra/clang-tidy/utils/UsingInserter.cpp index 3a2c16ff05dae..e4c71aa60a7a2 100644 --- a/clang-tools-extra/clang-tidy/utils/UsingInserter.cpp +++ b/clang-tools-extra/clang-tidy/utils/UsingInserter.cpp @@ -1,4 +1,4 @@ -//===---------- UsingInserter.cpp - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/utils/UsingInserter.h b/clang-tools-extra/clang-tidy/utils/UsingInserter.h index 7ff1f0b9792e1..23c317581c191 100644 --- a/clang-tools-extra/clang-tidy/utils/UsingInserter.h +++ b/clang-tools-extra/clang-tidy/utils/UsingInserter.h @@ -1,4 +1,4 @@ -//===---------- UsingInserter.h - clang-tidy ----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.cpp b/clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.cpp index bb2c71913193b..96a36cba827e6 100644 --- a/clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.cpp +++ b/clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.cpp @@ -1,4 +1,4 @@ -//===--- TemporaryObjectsCheck.cpp - clang-tidy----------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.h b/clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.h index b2d5ab61fb0dc..5ecf9c4172d18 100644 --- a/clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.h +++ b/clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.h @@ -1,4 +1,4 @@ -//===--- TemporaryObjectsCheck.h - clang-tidy------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clang-tidy/zircon/ZirconTidyModule.cpp b/clang-tools-extra/clang-tidy/zircon/ZirconTidyModule.cpp index 0eb5683a94e41..86d7ce4e04e7b 100644 --- a/clang-tools-extra/clang-tidy/zircon/ZirconTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/zircon/ZirconTidyModule.cpp @@ -1,4 +1,4 @@ -//===--- ZirconTidyModule.cpp - clang-tidy---------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py index 3eaba0e9dff3a..26f8cbaeb9f31 100755 --- a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py +++ b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# ===- check_clang_tidy.py - ClangTidy Test Helper ------------*- python -*--===# +# ===-----------------------------------------------------------------------===# # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. From 7dd2f1cc10902e632c0c78c75a30432a53eb59dc Mon Sep 17 00:00:00 2001 From: Gedare Bloom Date: Sun, 14 Sep 2025 14:47:06 -0600 Subject: [PATCH 271/734] [clang-format] Add IndentPPDirectives Leave option (#139750) Allow an option to leave preprocessor directive indenting as-is. This simplifies handling mixed styles of CPP directive indentation. Fixes #38511 --- clang/docs/ClangFormatStyleOptions.rst | 15 +++++ clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/Format/Format.h | 14 +++- clang/lib/Format/ContinuationIndenter.cpp | 24 +++---- clang/lib/Format/Format.cpp | 1 + clang/lib/Format/TokenAnnotator.cpp | 2 +- clang/lib/Format/UnwrappedLineFormatter.cpp | 16 +++-- clang/lib/Format/UnwrappedLineParser.cpp | 10 +-- clang/lib/Format/UnwrappedLineParser.h | 7 ++ clang/unittests/Format/FormatTest.cpp | 71 +++++++++++++++++++++ 10 files changed, 136 insertions(+), 25 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 6be4d512bda6a..9413b9a348b76 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -4426,6 +4426,21 @@ the configuration (without a prefix: ``Auto``). #endif #endif + * ``PPDIS_Leave`` (in configuration: ``Leave``) + Leaves indentation of directives as-is. + + .. note:: + + Ignores ``PPIndentWidth``. + + .. code-block:: c++ + + #if FOO + #if BAR + #include + #endif + #endif + .. _IndentRequiresClause: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6eb2a52e80ba9..873d63f56480c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -475,6 +475,7 @@ clang-format - Add ``SpaceInEmptyBraces`` option and set it to ``Always`` for WebKit style. - Add ``NumericLiteralCase`` option for enforcing character case in numeric literals. +- Add ``Leave`` suboption to ``IndentPPDirectives``. libclang -------- diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 03cff5f8cfb66..342fefcfc408c 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2976,7 +2976,19 @@ struct FormatStyle { /// #endif /// #endif /// \endcode - PPDIS_BeforeHash + PPDIS_BeforeHash, + /// Leaves indentation of directives as-is. + /// \note + /// Ignores ``PPIndentWidth``. + /// \endnote + /// \code + /// #if FOO + /// #if BAR + /// #include + /// #endif + /// #endif + /// \endcode + PPDIS_Leave }; /// The preprocessor directive indenting style to use. diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 888d0faf80931..9413c13a4137e 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -780,19 +780,21 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // Indent preprocessor directives after the hash if required. int PPColumnCorrection = 0; - if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && - Previous.is(tok::hash) && State.FirstIndent > 0 && - &Previous == State.Line->First && + if (&Previous == State.Line->First && Previous.is(tok::hash) && (State.Line->Type == LT_PreprocessorDirective || State.Line->Type == LT_ImportStatement)) { - Spaces += State.FirstIndent; - - // For preprocessor indent with tabs, State.Column will be 1 because of the - // hash. This causes second-level indents onward to have an extra space - // after the tabs. We avoid this misalignment by subtracting 1 from the - // column value passed to replaceWhitespace(). - if (Style.UseTab != FormatStyle::UT_Never) - PPColumnCorrection = -1; + if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) { + Spaces += State.FirstIndent; + + // For preprocessor indent with tabs, State.Column will be 1 because of + // the hash. This causes second-level indents onward to have an extra + // space after the tabs. We avoid this misalignment by subtracting 1 from + // the column value passed to replaceWhitespace(). + if (Style.UseTab != FormatStyle::UT_Never) + PPColumnCorrection = -1; + } else if (Style.IndentPPDirectives == FormatStyle::PPDIS_Leave) { + Spaces += Current.OriginalColumn - Previous.OriginalColumn - 1; + } } if (!DryRun) { diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index f095d2c18cfcf..1776e373cf8a8 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -535,6 +535,7 @@ struct ScalarEnumerationTraits { IO.enumCase(Value, "None", FormatStyle::PPDIS_None); IO.enumCase(Value, "AfterHash", FormatStyle::PPDIS_AfterHash); IO.enumCase(Value, "BeforeHash", FormatStyle::PPDIS_BeforeHash); + IO.enumCase(Value, "Leave", FormatStyle::PPDIS_Leave); } }; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index bbb7ef2c337d6..d97f56751ea69 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3627,7 +3627,7 @@ void TokenAnnotator::setCommentLineLevels( // Align comments for preprocessor lines with the # in column 0 if // preprocessor lines are not indented. Otherwise, align with the next // line. - Line->Level = Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && + Line->Level = Style.IndentPPDirectives < FormatStyle::PPDIS_BeforeHash && PPDirectiveOrImportStmt ? 0 : NextNonCommentLine->Level; diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 2a7bfd1a7dc5b..ac9d147defc13 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -62,10 +62,16 @@ class LevelIndentTracker { // having the right size in adjustToUnmodifiedline. if (Line.Level >= IndentForLevel.size()) IndentForLevel.resize(Line.Level + 1, -1); - if (Style.IndentPPDirectives != FormatStyle::PPDIS_None && - (Line.InPPDirective || - (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && - Line.Type == LT_CommentAbovePPDirective))) { + if (Style.IndentPPDirectives == FormatStyle::PPDIS_Leave && + (Line.InPPDirective || Line.Type == LT_CommentAbovePPDirective)) { + Indent = Line.InMacroBody + ? (Line.Level - Line.PPLevel) * Style.IndentWidth + + AdditionalIndent + : Line.First->OriginalColumn; + } else if (Style.IndentPPDirectives != FormatStyle::PPDIS_None && + (Line.InPPDirective || + (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && + Line.Type == LT_CommentAbovePPDirective))) { unsigned PPIndentWidth = (Style.PPIndentWidth >= 0) ? Style.PPIndentWidth : Style.IndentWidth; Indent = Line.InMacroBody @@ -1656,7 +1662,7 @@ void UnwrappedLineFormatter::formatFirstToken( // Preprocessor directives get indented before the hash only if specified. In // Javascript import statements are indented like normal statements. if (!Style.isJavaScript() && - Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && + Style.IndentPPDirectives < FormatStyle::PPDIS_BeforeHash && (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement)) { Indent = 0; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index f4bbfcf8461bc..2c9766c9b7bc0 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -162,17 +162,13 @@ UnwrappedLineParser::UnwrappedLineParser( LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), - IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None - ? IG_Rejected - : IG_Inited), + IncludeGuard(getIncludeGuardState(Style.IndentPPDirectives)), IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; - IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None - ? IG_Rejected - : IG_Inited; + IncludeGuard = getIncludeGuardState(Style.IndentPPDirectives); IncludeGuardToken = nullptr; Line.reset(new UnwrappedLine); CommentsBeforeNextToken.clear(); @@ -1140,7 +1136,7 @@ void UnwrappedLineParser::parsePPEndIf() { // If the #endif of a potential include guard is the last thing in the file, // then we found an include guard. if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && - Style.IndentPPDirectives != FormatStyle::PPDIS_None) { + getIncludeGuardState(Style.IndentPPDirectives) == IG_Inited) { IncludeGuard = IG_Found; } } diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 8e29680ff244b..8b8ad84896f1a 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -397,6 +397,13 @@ class UnwrappedLineParser { // Current state of include guard search. IncludeGuardState IncludeGuard; + IncludeGuardState + getIncludeGuardState(FormatStyle::PPDirectiveIndentStyle Style) const { + return Style == FormatStyle::PPDIS_None || Style == FormatStyle::PPDIS_Leave + ? IG_Rejected + : IG_Inited; + } + // Points to the #ifndef condition for a potential include guard. Null unless // IncludeGuardState == IG_IfNdefed. FormatToken *IncludeGuardToken; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 4e9d31895998f..d9db06667d802 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -5564,6 +5564,63 @@ TEST_F(FormatTest, IndentsPPDirectiveWithPPIndentWidth) { " }", style); + style.IndentPPDirectives = FormatStyle::PPDIS_Leave; + style.IndentWidth = 4; + verifyNoChange("#ifndef foo\n" + "#define foo\n" + "if (emacs) {\n" + "#ifdef is\n" + "#define lit \\\n" + " if (af) { \\\n" + " return duh(); \\\n" + " }\n" + "#endif\n" + "}\n" + "#endif", + style); + verifyNoChange("#ifndef foo\n" + " #define foo\n" + "if (emacs) {\n" + " #ifdef is\n" + "#define lit \\\n" + " if (af) { \\\n" + " return duh(); \\\n" + " }\n" + " #endif\n" + "}\n" + "#endif", + style); + verifyNoChange(" #ifndef foo\n" + "# define foo\n" + "if (emacs) {\n" + "#ifdef is\n" + " # define lit \\\n" + " if (af) { \\\n" + " return duh(); \\\n" + " }\n" + "#endif\n" + "}\n" + " #endif", + style); + verifyNoChange("#ifdef foo\n" + "#else\n" + "/* This is a comment */\n" + "#ifdef BAR\n" + "#endif\n" + "#endif", + style); + + style.IndentWidth = 1; + style.PPIndentWidth = 4; + verifyNoChange("# if 1\n" + " #define X \\\n" + " { \\\n" + " x; \\\n" + " x; \\\n" + " }\n" + "# endif", + style); + style.IndentWidth = 4; style.PPIndentWidth = 1; style.IndentPPDirectives = FormatStyle::PPDIS_AfterHash; @@ -25597,6 +25654,20 @@ TEST_F(FormatTest, SkipMacroDefinitionBody) { "a", Style); + Style.IndentPPDirectives = FormatStyle::PPDIS_Leave; + verifyNoChange("#if A\n" + "#define A a\n" + "#endif", + Style); + verifyNoChange("#if A\n" + " #define A a\n" + "#endif", + Style); + verifyNoChange("#if A\n" + "# define A a\n" + "#endif", + Style); + // Adjust indendations but don't change the definition. Style.IndentPPDirectives = FormatStyle::PPDIS_None; verifyNoChange("#if A\n" From 885546c83c1f9417d0e39679e39be3d055604db2 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 14:04:06 -0700 Subject: [PATCH 272/734] [ADT] Adjust parameter names for DenseMap constructors (NFC) (#158501) This patch ensures that both DenseMap and SmallDenseMap have: explicit DenseMap(unsigned NumElementsToReserve = 0) { init(NumElementsToReserve); } for consistency and clarity (modulo the class name, of course). --- llvm/include/llvm/ADT/DenseMap.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index b478ce21d79e6..e13a2cb09a412 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -710,9 +710,11 @@ class DenseMap : public DenseMapBase, unsigned NumBuckets; public: - /// Create a DenseMap with an optional \p InitialReserve that guarantee that - /// this number of elements can be inserted in the map without grow() - explicit DenseMap(unsigned InitialReserve = 0) { init(InitialReserve); } + /// Create a DenseMap with an optional \p NumElementsToReserve to guarantee + /// that this number of elements can be inserted in the map without grow(). + explicit DenseMap(unsigned NumElementsToReserve = 0) { + init(NumElementsToReserve); + } DenseMap(const DenseMap &other) : BaseT() { init(0); @@ -887,8 +889,8 @@ class SmallDenseMap AlignedCharArrayUnion storage; public: - explicit SmallDenseMap(unsigned NumElementsToReservre = 0) { - init(NumElementsToReservre); + explicit SmallDenseMap(unsigned NumElementsToReserve = 0) { + init(NumElementsToReserve); } SmallDenseMap(const SmallDenseMap &other) : BaseT() { From 0e36aa1640a5c0d00138c61de0f99f785cff1e2a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 14:04:13 -0700 Subject: [PATCH 273/734] [ADT, Support] Use std::bool_constant (NFC) (#158503) This patch replaces, std::integral_constant with std::bool_constant for brevity. Note that std::bool_constant was introduced as part of C++17. There are cases where we could strip away std::bool_constant altogether: std::bool_constant> but I'm not doing that in this patch to avoid doing multiple things in one patch. --- llvm/include/llvm/ADT/Hashing.h | 19 ++++++------ llvm/include/llvm/ADT/ilist_node_options.h | 8 ++--- llvm/include/llvm/Support/CFGDiff.h | 7 ++--- llvm/include/llvm/Support/FormatProviders.h | 29 ++++++++----------- .../llvm/Support/FormatVariadicDetails.h | 23 +++++++-------- llvm/include/llvm/Support/HashBuilder.h | 3 +- llvm/include/llvm/Support/YAMLTraits.h | 15 +++++----- 7 files changed, 47 insertions(+), 57 deletions(-) diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h index ec22fe3a28cf9..41a730e24a6b1 100644 --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -333,20 +333,21 @@ inline uint64_t get_execution_seed() { // for equality. For all the platforms we care about, this holds for integers // and pointers, but there are platforms where it doesn't and we would like to // support user-defined types which happen to satisfy this property. -template struct is_hashable_data - : std::integral_constant::value || - std::is_pointer::value) && - 64 % sizeof(T) == 0)> {}; +template +struct is_hashable_data : std::bool_constant<((is_integral_or_enum::value || + std::is_pointer::value) && + 64 % sizeof(T) == 0)> {}; // Special case std::pair to detect when both types are viable and when there // is no alignment-derived padding in the pair. This is a bit of a lie because // std::pair isn't truly POD, but it's close enough in all reasonable // implementations for our use case of hashing the underlying data. -template struct is_hashable_data > - : std::integral_constant::value && - is_hashable_data::value && - (sizeof(T) + sizeof(U)) == - sizeof(std::pair))> {}; +template +struct is_hashable_data> + : std::bool_constant<(is_hashable_data::value && + is_hashable_data::value && + (sizeof(T) + sizeof(U)) == sizeof(std::pair))> { +}; /// Helper to get the hashable data representation for a type. template auto get_hashable_data(const T &value) { diff --git a/llvm/include/llvm/ADT/ilist_node_options.h b/llvm/include/llvm/ADT/ilist_node_options.h index d26e79b925ad1..143195aa9c647 100644 --- a/llvm/include/llvm/ADT/ilist_node_options.h +++ b/llvm/include/llvm/ADT/ilist_node_options.h @@ -82,7 +82,7 @@ template struct extract_sentinel_tracking; template struct extract_sentinel_tracking< ilist_sentinel_tracking, Options...> - : std::integral_constant, is_explicit {}; + : std::bool_constant, is_explicit {}; template struct extract_sentinel_tracking : extract_sentinel_tracking {}; @@ -119,7 +119,7 @@ template struct is_valid_option> : std::true_type {}; template struct extract_iterator_bits; template struct extract_iterator_bits, Options...> - : std::integral_constant {}; + : std::bool_constant {}; template struct extract_iterator_bits : extract_iterator_bits {}; @@ -149,8 +149,8 @@ template struct check_options; template <> struct check_options<> : std::true_type {}; template struct check_options - : std::integral_constant::value && - check_options::value> {}; + : std::bool_constant::value && + check_options::value> {}; /// Traits for options for \a ilist_node. /// diff --git a/llvm/include/llvm/Support/CFGDiff.h b/llvm/include/llvm/Support/CFGDiff.h index 11bb9c0fb8f4d..41004d755a124 100644 --- a/llvm/include/llvm/Support/CFGDiff.h +++ b/llvm/include/llvm/Support/CFGDiff.h @@ -34,18 +34,17 @@ namespace llvm { namespace detail { template -auto reverse_if_helper(Range &&R, std::integral_constant) { +auto reverse_if_helper(Range &&R, std::bool_constant) { return std::forward(R); } template -auto reverse_if_helper(Range &&R, std::integral_constant) { +auto reverse_if_helper(Range &&R, std::bool_constant) { return llvm::reverse(std::forward(R)); } template auto reverse_if(Range &&R) { - return reverse_if_helper(std::forward(R), - std::integral_constant{}); + return reverse_if_helper(std::forward(R), std::bool_constant{}); } } // namespace detail diff --git a/llvm/include/llvm/Support/FormatProviders.h b/llvm/include/llvm/Support/FormatProviders.h index b7d2e2e45f71f..3e0800e1efe6c 100644 --- a/llvm/include/llvm/Support/FormatProviders.h +++ b/llvm/include/llvm/Support/FormatProviders.h @@ -29,35 +29,31 @@ namespace support { namespace detail { template struct use_integral_formatter - : public std::integral_constant< - bool, is_one_of::value> {}; + : public std::bool_constant< + is_one_of::value> {}; template -struct use_char_formatter - : public std::integral_constant> {}; +struct use_char_formatter : public std::bool_constant> { +}; template struct is_cstring - : public std::integral_constant::value> { -}; + : public std::bool_constant::value> {}; template struct use_string_formatter - : public std::integral_constant> { -}; + : public std::bool_constant> {}; template struct use_pointer_formatter - : public std::integral_constant && - !is_cstring::value> {}; + : public std::bool_constant && !is_cstring::value> { +}; template struct use_double_formatter - : public std::integral_constant> {}; + : public std::bool_constant> {}; class HelperFunctions { protected: @@ -330,8 +326,7 @@ using IterValue = typename std::iterator_traits::value_type; template struct range_item_has_provider - : public std::integral_constant< - bool, + : public std::bool_constant< !support::detail::uses_missing_provider>::value> {}; } // namespace detail } // namespace support diff --git a/llvm/include/llvm/Support/FormatVariadicDetails.h b/llvm/include/llvm/Support/FormatVariadicDetails.h index b85a4f6065195..aaad226666aa1 100644 --- a/llvm/include/llvm/Support/FormatVariadicDetails.h +++ b/llvm/include/llvm/Support/FormatVariadicDetails.h @@ -96,26 +96,24 @@ template class has_StreamOperator { // based format() invocation. template struct uses_format_member - : public std::integral_constant< - bool, std::is_base_of_v>> { -}; + : public std::bool_constant< + std::is_base_of_v>> {}; // Simple template that decides whether a type T should use the format_provider // based format() invocation. The member function takes priority, so this test // will only be true if there is not ALSO a format member. template struct uses_format_provider - : public std::integral_constant< - bool, !uses_format_member::value && has_FormatProvider::value> { -}; + : public std::bool_constant::value && + has_FormatProvider::value> {}; // Simple template that decides whether a type T should use the operator<< // based format() invocation. This takes last priority. template struct uses_stream_operator - : public std::integral_constant::value && - !uses_format_provider::value && - has_StreamOperator::value> {}; + : public std::bool_constant::value && + !uses_format_provider::value && + has_StreamOperator::value> {}; // Simple template that decides whether a type T has neither a member-function // nor format_provider based implementation that it can use. Mostly used so @@ -123,10 +121,9 @@ struct uses_stream_operator // implementation can be located. template struct uses_missing_provider - : public std::integral_constant::value && - !uses_format_provider::value && - !uses_stream_operator::value> { -}; + : public std::bool_constant::value && + !uses_format_provider::value && + !uses_stream_operator::value> {}; template std::enable_if_t::value, T> diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h index 17fbc3f96ed04..ae266d3f19a1a 100644 --- a/llvm/include/llvm/Support/HashBuilder.h +++ b/llvm/include/llvm/Support/HashBuilder.h @@ -32,8 +32,7 @@ namespace hashbuilder_detail { /// Trait to indicate whether a type's bits can be hashed directly (after /// endianness correction). template -struct IsHashableData - : std::integral_constant::value> {}; +struct IsHashableData : std::bool_constant::value> {}; } // namespace hashbuilder_detail diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index 27af2d60c837f..cce36a253777b 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -459,8 +459,7 @@ template struct has_FlowTraits { // Test if SequenceTraits is defined on type T template struct has_SequenceTraits - : public std::integral_constant::value> { -}; + : public std::bool_constant::value> {}; // Test if DocumentListTraits is defined on type T template struct has_DocumentListTraits { @@ -683,15 +682,15 @@ struct missingTraits template struct validatedMappingTraits - : public std::integral_constant< - bool, has_MappingTraits::value && - has_MappingValidateTraits::value> {}; + : public std::bool_constant::value && + has_MappingValidateTraits::value> { +}; template struct unvalidatedMappingTraits - : public std::integral_constant< - bool, has_MappingTraits::value && - !has_MappingValidateTraits::value> {}; + : public std::bool_constant::value && + !has_MappingValidateTraits::value> { +}; // Base class for Input and Output. class LLVM_ABI IO { From 7d949ee04f9863093dc3605feeca931318b0a0cd Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 14:04:20 -0700 Subject: [PATCH 274/734] [Support] Use llvm::is_detected (NFC) (#158504) This patch uses llvm::is_detected to replace the old SFINAE-based approach. --- llvm/include/llvm/Support/FormatVariadicDetails.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Support/FormatVariadicDetails.h b/llvm/include/llvm/Support/FormatVariadicDetails.h index aaad226666aa1..fa11d56fc1ada 100644 --- a/llvm/include/llvm/Support/FormatVariadicDetails.h +++ b/llvm/include/llvm/Support/FormatVariadicDetails.h @@ -66,13 +66,10 @@ template class has_FormatProvider { typedef void (*Signature_format)(const Decayed &, llvm::raw_ostream &, StringRef); - template - static char test(SameType *); - - template static double test(...); + template using check = SameType; - static bool const value = - (sizeof(test>(nullptr)) == 1); + static constexpr bool value = + llvm::is_detected>::value; }; // Test if raw_ostream& << T -> raw_ostream& is findable via ADL. From 30f4781eef567b99214e02137a57c7ac91279a48 Mon Sep 17 00:00:00 2001 From: owenca Date: Sun, 14 Sep 2025 14:20:59 -0700 Subject: [PATCH 275/734] [clang-format] Handle C digit separators (#158418) Fixes #158413 --- clang/lib/Format/Format.cpp | 1 + clang/unittests/Format/TokenAnnotatorTest.cpp | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 1776e373cf8a8..68e9618432035 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -4117,6 +4117,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { switch (Style.Language) { case FormatStyle::LK_C: LangOpts.C11 = 1; + LangOpts.C23 = 1; break; case FormatStyle::LK_Cpp: case FormatStyle::LK_ObjC: diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 141b0001cb52d..f6435f13f0791 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -4105,6 +4105,13 @@ TEST_F(TokenAnnotatorTest, UTF8StringLiteral) { EXPECT_TOKEN(Tokens[1], tok::utf8_string_literal, TT_Unknown); } +TEST_F(TokenAnnotatorTest, C23DigitSeparator) { + auto Tokens = annotate("return 1'000;", getLLVMStyle(FormatStyle::LK_C)); + ASSERT_EQ(Tokens.size(), 4u) << Tokens; + EXPECT_EQ(Tokens[1]->TokenText, "1'000"); + EXPECT_TOKEN(Tokens[2], tok::semi, TT_Unknown); +} + TEST_F(TokenAnnotatorTest, IdentifierPackage) { auto Tokens = annotate("auto package;"); ASSERT_EQ(Tokens.size(), 4u) << Tokens; From fb60d0337c15640df95872d90240fde42fb80ea1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 14 Sep 2025 22:24:56 +0100 Subject: [PATCH 276/734] [VPlan] Return non-option cost from getCostForRecipeWithOpcode (NFC). getCostForRecipeWithOpcode must only be called with supported opcodes. Directly return the cost, and add llvm_unreachable to catch unhandled cases. --- llvm/lib/Transforms/Vectorize/VPlan.h | 5 ++--- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 997a45b1470ef..8afc30ede3f47 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -918,9 +918,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { void execute(VPTransformState &State) override = 0; /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx. - std::optional - getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, - VPCostContext &Ctx) const; + InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, + VPCostContext &Ctx) const; }; /// Helper to access the operand that contains the unroll part for this recipe diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b72088bf1431e..95e3196478176 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -978,7 +978,7 @@ Value *VPInstruction::generate(VPTransformState &State) { } } -std::optional VPRecipeWithIRFlags::getCostForRecipeWithOpcode( +InstructionCost VPRecipeWithIRFlags::getCostForRecipeWithOpcode( unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const { Type *ScalarTy = Ctx.Types.inferScalarType(this); Type *ResultTy = VF.isVector() ? toVectorTy(ScalarTy, VF) : ScalarTy; @@ -1044,7 +1044,7 @@ std::optional VPRecipeWithIRFlags::getCostForRecipeWithOpcode( {TTI::OK_AnyValue, TTI::OP_None}, CtxI); } } - return std::nullopt; + llvm_unreachable("called for unsupported opcode"); } InstructionCost VPInstruction::computeCost(ElementCount VF, @@ -1059,7 +1059,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, assert(!doesGeneratePerAllLanes() && "Should only generate a vector value or single scalar, not scalars " "for all lanes."); - return *getCostForRecipeWithOpcode( + return getCostForRecipeWithOpcode( getOpcode(), vputils::onlyFirstLaneUsed(this) ? ElementCount::getFixed(1) : VF, Ctx); } @@ -2206,7 +2206,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, case Instruction::ExtractValue: case Instruction::ICmp: case Instruction::FCmp: - return *getCostForRecipeWithOpcode(getOpcode(), VF, Ctx); + return getCostForRecipeWithOpcode(getOpcode(), VF, Ctx); default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -3151,15 +3151,15 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: - return *getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), - Ctx) * + return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), + Ctx) * (isSingleScalar() ? 1 : VF.getFixedValue()); case Instruction::SDiv: case Instruction::UDiv: case Instruction::SRem: case Instruction::URem: { - InstructionCost ScalarCost = *getCostForRecipeWithOpcode( - getOpcode(), ElementCount::getFixed(1), Ctx); + InstructionCost ScalarCost = + getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), Ctx); if (isSingleScalar()) return ScalarCost; From 983c8b6b2575c034dc98514a35d0fd9b08d9935e Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Mon, 15 Sep 2025 00:57:31 +0300 Subject: [PATCH 277/734] [RISCV] Remove a couple of custom instruction decoders (NFC) (#158483) These instructions can be decoded automatically. --- .../RISCV/Disassembler/RISCVDisassembler.cpp | 64 +++++++------------ llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td | 6 +- .../lib/Target/RISCV/RISCVInstrInfoZicfiss.td | 2 +- 3 files changed, 29 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index fb5a35daaf58f..ff07122b61378 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -194,12 +194,24 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeGPRX1RegisterClass(MCInst &Inst, + const MCDisassembler *Decoder) { + Inst.addOperand(MCOperand::createReg(RISCV::X1)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeSPRegisterClass(MCInst &Inst, const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createReg(RISCV::X2)); return MCDisassembler::Success; } +static DecodeStatus DecodeGPRX5RegisterClass(MCInst &Inst, + const MCDisassembler *Decoder) { + Inst.addOperand(MCOperand::createReg(RISCV::X5)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { @@ -408,6 +420,18 @@ static DecodeStatus decodeVMaskReg(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } +static DecodeStatus decodeImmThreeOperand(MCInst &Inst, + const MCDisassembler *Decoder) { + Inst.addOperand(MCOperand::createImm(3)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeImmFourOperand(MCInst &Inst, + const MCDisassembler *Decoder) { + Inst.addOperand(MCOperand::createImm(4)); + return MCDisassembler::Success; +} + template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint32_t Imm, int64_t Address, @@ -579,46 +603,6 @@ static DecodeStatus decodeXqccmpRlistS0(MCInst &Inst, uint32_t Imm, return decodeZcmpRlist(Inst, Imm, Address, Decoder); } -static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint16_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - uint32_t Rs1 = fieldFromInstruction(Insn, 7, 5); - [[maybe_unused]] DecodeStatus Result = - DecodeGPRX1X5RegisterClass(Inst, Rs1, Address, Decoder); - assert(Result == MCDisassembler::Success && "Invalid register"); - return MCDisassembler::Success; -} - -static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - DecodeStatus S = MCDisassembler::Success; - uint32_t Rd1 = fieldFromInstruction(Insn, 7, 5); - uint32_t Rs1 = fieldFromInstruction(Insn, 15, 5); - uint32_t Rd2 = fieldFromInstruction(Insn, 20, 5); - uint32_t UImm2 = fieldFromInstruction(Insn, 25, 2); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rd1, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rd2, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rs1, Address, Decoder))) - return MCDisassembler::Fail; - [[maybe_unused]] DecodeStatus Result = - decodeUImmOperand<2>(Inst, UImm2, Address, Decoder); - assert(Result == MCDisassembler::Success && "Invalid immediate"); - - // Disassemble the final operand which is implicit. - unsigned Opcode = Inst.getOpcode(); - bool IsWordOp = (Opcode == RISCV::TH_LWD || Opcode == RISCV::TH_LWUD || - Opcode == RISCV::TH_SWD); - if (IsWordOp) - Inst.addOperand(MCOperand::createImm(3)); - else - Inst.addOperand(MCOperand::createImm(4)); - - return S; -} - #include "RISCVGenDisassemblerTables.inc" namespace { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td index 49c9bdd83d3f6..b37ceaaee9cf4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -44,6 +44,7 @@ def ImmThreeAsmOperand : AsmOperandClass { def immthree : RISCVOp { let ParserMatchClass = ImmThreeAsmOperand; let OperandType = "OPERAND_THREE"; + let DecoderMethod = "decodeImmThreeOperand"; } def ImmFourAsmOperand : AsmOperandClass { @@ -56,6 +57,7 @@ def ImmFourAsmOperand : AsmOperandClass { def immfour : RISCVOp { let ParserMatchClass = ImmFourAsmOperand; let OperandType = "OPERAND_FOUR"; + let DecoderMethod = "decodeImmFourOperand"; } //===----------------------------------------------------------------------===// @@ -161,9 +163,9 @@ class THLoadPair funct5, string opcodestr, Operand consttype> (ins GPR:$rs1, uimm2:$uimm2, consttype:$const3or4), opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> { bits<2> uimm2; + bits<0> const3or4; let Inst{31-27} = funct5; let Inst{26-25} = uimm2; - let DecoderMethod = "decodeXTHeadMemPair"; let Constraints = "@earlyclobber $rd,@earlyclobber $rs2"; } @@ -173,9 +175,9 @@ class THStorePair funct5, string opcodestr, Operand consttype> (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, consttype:$const3or4), opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> { bits<2> uimm2; + bits<0> const3or4; let Inst{31-27} = funct5; let Inst{26-25} = uimm2; - let DecoderMethod = "decodeXTHeadMemPair"; } let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td index 50ebaa9951979..efd06c29dc99f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td @@ -12,12 +12,12 @@ class RVC_SSInst rs1val, RegisterClass reg_class, string opcodestr> : RVInst16<(outs), (ins reg_class:$rs1), opcodestr, "$rs1", [], InstFormatOther> { + bits<0> rs1; let Inst{15-13} = 0b011; let Inst{12} = 0; let Inst{11-7} = rs1val; let Inst{6-2} = 0b00000; let Inst{1-0} = 0b01; - let DecoderMethod = "decodeCSSPushPopchk"; } //===----------------------------------------------------------------------===// From fb58bc6763573f0a7182290cdbfcec469f34c273 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Mon, 15 Sep 2025 02:49:45 +0300 Subject: [PATCH 278/734] [TableGen][CodeEmitterGen] Cache Target/CGH in class (NFC) (#158517) To avoid passing them to member functions. --- llvm/utils/TableGen/CodeEmitterGen.cpp | 69 ++++++++++++-------------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp index 6a57ef6c90b36..588d354d1d293 100644 --- a/llvm/utils/TableGen/CodeEmitterGen.cpp +++ b/llvm/utils/TableGen/CodeEmitterGen.cpp @@ -48,30 +48,29 @@ using namespace llvm; namespace { class CodeEmitterGen { - const RecordKeeper &Records; + const RecordKeeper &RK; + CodeGenTarget Target; + const CodeGenHwModes &CGH; public: - CodeEmitterGen(const RecordKeeper &R) : Records(R) {} + explicit CodeEmitterGen(const RecordKeeper &RK); void run(raw_ostream &O); private: int getVariableBit(const std::string &VarName, const BitsInit *BI, int Bit); - std::pair - getInstructionCases(const Record *R, const CodeGenTarget &Target); + std::pair getInstructionCases(const Record *R); void addInstructionCasesForEncoding(const Record *R, const Record *EncodingDef, - const CodeGenTarget &Target, std::string &Case, std::string &BitOffsetCase); bool addCodeToMergeInOperand(const Record *R, const BitsInit *BI, const std::string &VarName, std::string &Case, - std::string &BitOffsetCase, - const CodeGenTarget &Target); + std::string &BitOffsetCase); void emitInstructionBaseValues( raw_ostream &O, ArrayRef NumberedInstructions, - const CodeGenTarget &Target, unsigned HwMode = DefaultMode); + unsigned HwMode = DefaultMode); void emitCaseMap(raw_ostream &O, const std::map> &CaseMap); @@ -102,8 +101,7 @@ bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R, const BitsInit *BI, const std::string &VarName, std::string &Case, - std::string &BitOffsetCase, - const CodeGenTarget &Target) { + std::string &BitOffsetCase) { CodeGenInstruction &CGI = Target.getInstruction(R); // Determine if VarName actually contributes to the Inst encoding. @@ -277,8 +275,7 @@ bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R, } std::pair -CodeEmitterGen::getInstructionCases(const Record *R, - const CodeGenTarget &Target) { +CodeEmitterGen::getInstructionCases(const Record *R) { std::string Case, BitOffsetCase; auto Append = [&](const std::string &S) { @@ -287,8 +284,7 @@ CodeEmitterGen::getInstructionCases(const Record *R, }; if (const Record *RV = R->getValueAsOptionalDef("EncodingInfos")) { - const CodeGenHwModes &HWM = Target.getHwModes(); - EncodingInfoByHwMode EBM(RV, HWM); + EncodingInfoByHwMode EBM(RV, CGH); // Invoke the interface to obtain the HwMode ID controlling the // EncodingInfo for the current subtarget. This interface will @@ -304,7 +300,7 @@ CodeEmitterGen::getInstructionCases(const Record *R, " case " + itostr(DefaultMode) + ": InstBitsByHw = InstBits"; } else { Case += " case " + itostr(ModeId) + ": InstBitsByHw = InstBits_" + - HWM.getMode(ModeId).Name.str(); + CGH.getMode(ModeId).Name.str(); } Case += "; break;\n"; } @@ -326,20 +322,20 @@ CodeEmitterGen::getInstructionCases(const Record *R, Append(" default: llvm_unreachable(\"Unhandled HwMode\");\n"); for (auto &[ModeId, Encoding] : EBM) { Append(" case " + itostr(ModeId) + ": {\n"); - addInstructionCasesForEncoding(R, Encoding, Target, Case, BitOffsetCase); + addInstructionCasesForEncoding(R, Encoding, Case, BitOffsetCase); Append(" break;\n"); Append(" }\n"); } Append(" }\n"); return {std::move(Case), std::move(BitOffsetCase)}; } - addInstructionCasesForEncoding(R, R, Target, Case, BitOffsetCase); + addInstructionCasesForEncoding(R, R, Case, BitOffsetCase); return {std::move(Case), std::move(BitOffsetCase)}; } void CodeEmitterGen::addInstructionCasesForEncoding( - const Record *R, const Record *EncodingDef, const CodeGenTarget &Target, - std::string &Case, std::string &BitOffsetCase) { + const Record *R, const Record *EncodingDef, std::string &Case, + std::string &BitOffsetCase) { const BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); // Loop over all of the fields in the instruction, determining which are the @@ -354,8 +350,8 @@ void CodeEmitterGen::addInstructionCasesForEncoding( if (RV.isNonconcreteOK() || RV.getValue()->isComplete()) continue; - Success &= addCodeToMergeInOperand(R, BI, RV.getName().str(), Case, - BitOffsetCase, Target); + Success &= + addCodeToMergeInOperand(R, BI, RV.getName().str(), Case, BitOffsetCase); } // Avoid empty switches. if (BitOffsetCase.size() == BitOffsetCaseSizeBeforeLoop) @@ -389,19 +385,18 @@ static void emitInstBits(raw_ostream &OS, const APInt &Bits) { void CodeEmitterGen::emitInstructionBaseValues( raw_ostream &O, ArrayRef NumberedInstructions, - const CodeGenTarget &Target, unsigned HwMode) { - const CodeGenHwModes &HWM = Target.getHwModes(); + unsigned HwMode) { if (HwMode == DefaultMode) O << " static const uint64_t InstBits[] = {\n"; else - O << " static const uint64_t InstBits_" - << HWM.getModeName(HwMode, /*IncludeDefault=*/true) << "[] = {\n"; + O << " static const uint64_t InstBits_" << CGH.getModeName(HwMode) + << "[] = {\n"; for (const CodeGenInstruction *CGI : NumberedInstructions) { const Record *R = CGI->TheDef; const Record *EncodingDef = R; if (const Record *RV = R->getValueAsOptionalDef("EncodingInfos")) { - EncodingInfoByHwMode EBM(RV, HWM); + EncodingInfoByHwMode EBM(RV, CGH); if (EBM.hasMode(HwMode)) { EncodingDef = EBM.get(HwMode); } else { @@ -447,29 +442,29 @@ void CodeEmitterGen::emitCaseMap( } } +CodeEmitterGen::CodeEmitterGen(const RecordKeeper &RK) + : RK(RK), Target(RK), CGH(Target.getHwModes()) { + // For little-endian instruction bit encodings, reverse the bit order. + Target.reverseBitsForLittleEndianEncoding(); +} + void CodeEmitterGen::run(raw_ostream &O) { emitSourceFileHeader("Machine Code Emitter", O); - CodeGenTarget Target(Records); - - // For little-endian instruction bit encodings, reverse the bit order - Target.reverseBitsForLittleEndianEncoding(); - ArrayRef EncodedInstructions = Target.getTargetNonPseudoInstructions(); if (Target.hasVariableLengthEncodings()) { - emitVarLenCodeEmitter(Records, O); + emitVarLenCodeEmitter(RK, O); return; } - const CodeGenHwModes &HWM = Target.getHwModes(); // The set of HwModes used by instruction encodings. std::set HwModes; BitWidth = 0; for (const CodeGenInstruction *CGI : EncodedInstructions) { const Record *R = CGI->TheDef; if (const Record *RV = R->getValueAsOptionalDef("EncodingInfos")) { - EncodingInfoByHwMode EBM(RV, HWM); + EncodingInfoByHwMode EBM(RV, CGH); for (const auto &[Key, Value] : EBM) { const BitsInit *BI = Value->getValueAsBitsInit("Inst"); BitWidth = std::max(BitWidth, BI->getNumBits()); @@ -498,13 +493,13 @@ void CodeEmitterGen::run(raw_ostream &O) { } // Emit instruction base values - emitInstructionBaseValues(O, EncodedInstructions, Target, DefaultMode); + emitInstructionBaseValues(O, EncodedInstructions, DefaultMode); if (!HwModes.empty()) { // Emit table for instrs whose encodings are controlled by HwModes. for (unsigned HwMode : HwModes) { if (HwMode == DefaultMode) continue; - emitInstructionBaseValues(O, EncodedInstructions, Target, HwMode); + emitInstructionBaseValues(O, EncodedInstructions, HwMode); } // This pointer will be assigned to the HwMode table later. @@ -521,7 +516,7 @@ void CodeEmitterGen::run(raw_ostream &O) { std::string InstName = (R->getValueAsString("Namespace") + "::" + R->getName()).str(); std::string Case, BitOffsetCase; - std::tie(Case, BitOffsetCase) = getInstructionCases(R, Target); + std::tie(Case, BitOffsetCase) = getInstructionCases(R); CaseMap[Case].push_back(InstName); BitOffsetCaseMap[BitOffsetCase].push_back(std::move(InstName)); From 7dae0b85b3d152ac45a59ad29f49bf8c8822cf67 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sun, 14 Sep 2025 20:37:26 -0400 Subject: [PATCH 279/734] [gn] port 52dd4b9b7e4 --- .../gn/secondary/llvm/lib/Target/AArch64/Disassembler/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/Disassembler/BUILD.gn index 196e4a6ae6826..c445ef2bacc62 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/Disassembler/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/Disassembler/BUILD.gn @@ -5,7 +5,6 @@ tablegen("AArch64GenDisassemblerTables") { args = [ "-gen-disassembler", "-ignore-non-decodable-operands", - "-ignore-fully-defined-operands", ] td_file = "../AArch64.td" } From 7ee4909256a03c5d6d1c558e5ebe4bc1c8fc2816 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 15 Sep 2025 00:38:00 +0000 Subject: [PATCH 280/734] [gn build] Port c642e2aa61c4 --- llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn index 103954e5756d3..74fcb8427511b 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn @@ -18,6 +18,7 @@ unittest("ClangReplInterpreterTests") { "IncrementalProcessingTest.cpp", "InterpreterExtensionsTest.cpp", "InterpreterTest.cpp", + "OutOfProcessInterpreterTests.cpp", ] # Support plugins. From 0e28fd7282492669e106617b52494fa4556bb5c4 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 15 Sep 2025 09:30:08 +0800 Subject: [PATCH 281/734] [RISCV] Check the types are the same for folding (sub 0, (setcc x, 0, setlt)) to (sra x, xlen - 1) (#158179) We should check the type of x is the same as `sub` operation. Otherwise the shift amount xlen -1 will exceed the bit size of x. Fixes https://github.com/llvm/llvm-project/issues/158121. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 ++- llvm/test/CodeGen/RISCV/pr158121.ll | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/pr158121.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f9b484b98739f..5485b916c2031 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15827,7 +15827,8 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && - isNullConstant(N1.getOperand(1))) { + isNullConstant(N1.getOperand(1)) && + N1.getValueType() == N1.getOperand(0).getValueType()) { ISD::CondCode CCVal = cast(N1.getOperand(2))->get(); if (CCVal == ISD::SETLT) { SDLoc DL(N); diff --git a/llvm/test/CodeGen/RISCV/pr158121.ll b/llvm/test/CodeGen/RISCV/pr158121.ll new file mode 100644 index 0000000000000..2c018444e9c67 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr158121.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +define i64 @f(ptr %p) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: +; CHECK-NEXT: lb a0, 0(a0) +; CHECK-NEXT: srai a0, a0, 63 +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %conv1 = zext i8 %load to i32 + %cmp = icmp ult i32 127, %conv1 + %conv2 = zext i1 %cmp to i32 + %sub = sub nsw i32 0, %conv2 + %conv3 = sext i32 %sub to i64 + ret i64 %conv3 +} From c128f3bba4d60cb3b9a5c08854d7b83fd5317e0f Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 15 Sep 2025 09:42:51 +0800 Subject: [PATCH 282/734] [RISCV] Remove unneeded TODOs from fixed-vectors-shuffle-int.ll. There is a single vslideup.vi already. --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll index eb41ed413a0b4..5683476852683 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll @@ -840,7 +840,6 @@ define <8 x i32> @shuffle_spread3_singlesrc_e32(<8 x i32> %v) { ret <8 x i32> %out } -; TODO: This should be a single vslideup.vi define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e32: ; CHECK: # %bb.0: @@ -937,7 +936,6 @@ define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) { ret <8 x i32> %out } -; TODO: This should be a single vslideup.vi define <8 x i8> @shuffle_decompress_singlesrc_e8(<8 x i8> %v) { ; CHECK-LABEL: shuffle_decompress_singlesrc_e8: ; CHECK: # %bb.0: From fef88d2ef21edc83660968e0134e28dda88d4a11 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Mon, 15 Sep 2025 10:10:07 +0800 Subject: [PATCH 283/734] [libclc][NFC] Update README.md to use runtime build (#158283) LLVM_ENABLE_PROJECTS=libclc is deprecated, see https://github.com/llvm/llvm-project/blob/a2a9601ea49a/llvm/CMakeLists.txt#L223-L228 --- libclc/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libclc/README.md b/libclc/README.md index 34f329d861199..4f14066425d2d 100644 --- a/libclc/README.md +++ b/libclc/README.md @@ -31,8 +31,8 @@ more targets is welcome. For an in-tree build, Clang must also be built at the same time: ``` -$ cmake /llvm-project/llvm/CMakeLists.txt -DLLVM_ENABLE_PROJECTS="libclc;clang" \ - -DCMAKE_BUILD_TYPE=Release -G Ninja +$ cmake /llvm-project/llvm/CMakeLists.txt -DLLVM_ENABLE_PROJECTS="clang" \ + -DLLVM_ENABLE_RUNTIMES="libclc" -DCMAKE_BUILD_TYPE=Release -G Ninja $ ninja ``` Then install: From 5850c44288cba01c75f8afb7cfcf3a2d47e2d5a4 Mon Sep 17 00:00:00 2001 From: Mingjie Xu Date: Mon, 15 Sep 2025 10:28:40 +0800 Subject: [PATCH 284/734] [InstCombine] Improve `foldDeadPhiWeb` compile time (#158057) The foldDeadPhiWeb function identifies and removes small dead PHI webs, it bails out if the web size exceeds threshold (16). In the current implementation, when there is a phi node has large number of users that most of them are phi nodes, we still push them on the `Stack` even if the number of phi nodes user exceeds the threshold. This patch checks the early stop condition when we push an unvisited phi node on the `Stack`. With this change, the wall duration of total instcombine pass decreased from 523,649.276 ms to 208,687.042 ms in an our internal case. --- .../lib/Transforms/InstCombine/InstCombinePHI.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index ed9a0be6981fa..15e7172c6ce12 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -60,17 +60,18 @@ bool InstCombinerImpl::foldDeadPhiWeb(PHINode &PN) { SmallVector Stack; SmallPtrSet Visited; Stack.push_back(&PN); + Visited.insert(&PN); while (!Stack.empty()) { PHINode *Phi = Stack.pop_back_val(); - if (!Visited.insert(Phi).second) - continue; - // Early stop if the set of PHIs is large - if (Visited.size() == 16) - return false; for (User *Use : Phi->users()) { - if (PHINode *PhiUse = dyn_cast(Use)) + if (PHINode *PhiUse = dyn_cast(Use)) { + if (!Visited.insert(PhiUse).second) + continue; + // Early stop if the set of PHIs is large + if (Visited.size() >= 16) + return false; Stack.push_back(PhiUse); - else + } else return false; } } From d5f58a582769af320323cb0c1ea450d8b3962a41 Mon Sep 17 00:00:00 2001 From: Weibo He Date: Mon, 15 Sep 2025 10:51:17 +0800 Subject: [PATCH 285/734] [CoroSplit] Fix use-after-free related to coro.suspend (#156572) Fix #156444 --- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 2 +- .../Transforms/Coroutines/coro-split-invalid.ll | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Coroutines/coro-split-invalid.ll diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index ac93f748ce65c..28a89a8f87dbd 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -356,9 +356,9 @@ void coro::Shape::invalidateCoroutine( // present. for (AnyCoroSuspendInst *CS : CoroSuspends) { CS->replaceAllUsesWith(PoisonValue::get(CS->getType())); - CS->eraseFromParent(); if (auto *CoroSave = CS->getCoroSave()) CoroSave->eraseFromParent(); + CS->eraseFromParent(); } CoroSuspends.clear(); diff --git a/llvm/test/Transforms/Coroutines/coro-split-invalid.ll b/llvm/test/Transforms/Coroutines/coro-split-invalid.ll new file mode 100644 index 0000000000000..94fe539697214 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-split-invalid.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; Tests that coro-split correctly invalidate bad coroutines +; RUN: opt < %s -passes='cgscc(coro-split)' -S | FileCheck %s + +define void @pr156444() presplitcoroutine { +; CHECK-LABEL: define void @pr156444( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + %0 = call i8 @llvm.coro.suspend(token none, i1 false) + ret void +} From d271ace0850a8a4a24a9f62e6d1970352dd2f6e8 Mon Sep 17 00:00:00 2001 From: thetruestblue Date: Sun, 14 Sep 2025 22:59:24 -0400 Subject: [PATCH 286/734] [Test][ASan][Sanitizer] Make atos symbolizer test asan and simulator only (#158522) Simulator environments run only atos symbolizer rather than falling back to dladdr. Because sanitizer-common does not target simulators, move this test into asan and simulator only to make it predicatable test. Original change sha: 0f9bfe0a02ffff077a1a98065069b52744e31723 rdar://108003900 --- .../TestCases/Darwin/atos-symbolized-recover.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) rename compiler-rt/test/{sanitizer_common => asan}/TestCases/Darwin/atos-symbolized-recover.cpp (69%) diff --git a/compiler-rt/test/sanitizer_common/TestCases/Darwin/atos-symbolized-recover.cpp b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolized-recover.cpp similarity index 69% rename from compiler-rt/test/sanitizer_common/TestCases/Darwin/atos-symbolized-recover.cpp rename to compiler-rt/test/asan/TestCases/Darwin/atos-symbolized-recover.cpp index 4234e0c9a9af3..08b31af136fa0 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Darwin/atos-symbolized-recover.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolized-recover.cpp @@ -1,12 +1,9 @@ // Check that there is a warning when atos fails to symbolize an address // and that atos continues symbolicating correctly after. -// RUN: %clangxx -O0 %s -o %t +// RUN: %clangxx_asan -O0 %s -o %t // RUN: not %run %t 2>&1 | FileCheck %s - -// This test tests for undefined behavior and is leading to various failures. -// Going to disable to unblock CI and rethink a test for this. rdar://107846128 -// UNSUPPORTED: darwin +// REQUIRES: iossim void bar() { void *invalid_addr = reinterpret_cast(0xDEADBEEF); @@ -19,4 +16,4 @@ int main() { return 0; // CHECK: WARNING: atos failed to symbolize address{{.*}} // CHECK: {{.*}}atos-symbolized-recover.cpp:[[@LINE-3]]{{.*}} -} +} \ No newline at end of file From e2455bfc101bfdd59ac6426c3a804f8b78152fd7 Mon Sep 17 00:00:00 2001 From: Jinjie Huang Date: Mon, 15 Sep 2025 11:30:54 +0800 Subject: [PATCH 287/734] [BOLT][DWARF] Get DWO file via relative path if the CompilationDir does not exist (#154515) In distributed builds, the DWARF CompilationDir is often invalid, causing BOLT to fail when locating DWO files. If the default path does not exist, it seems better to consider the DWOName as a relative path in this case. The implementation of this patch will try to search for the DWO file in the following order: 1. CompDirOverride + DWOName (if CompDirOverride specified) 2. CompilationDir + DWOName (if CompilationDir exists) 3. **Current directory + DWOName (relative path as a fallback)** This patch also fixes a crash that occurs when DWOName is an absolute path and a DWP file is provided. --- bolt/lib/Core/BinaryContext.cpp | 19 +++++++++++++++---- bolt/lib/Rewrite/DWARFRewriter.cpp | 9 +++++---- bolt/test/dwo-name-retrieving.test | 19 +++++++++++++++++++ 3 files changed, 39 insertions(+), 8 deletions(-) create mode 100755 bolt/test/dwo-name-retrieving.test diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 8e2224b51fa8a..72c72bbaf4a65 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Regex.h" #include #include @@ -1632,11 +1633,20 @@ void BinaryContext::preprocessDWODebugInfo() { DwarfUnit->getUnitDIE().find( {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), ""); - SmallString<16> AbsolutePath; + SmallString<16> AbsolutePath(DWOName); + std::string DWOCompDir = DwarfUnit->getCompilationDir(); if (!opts::CompDirOverride.empty()) { - sys::path::append(AbsolutePath, opts::CompDirOverride); - sys::path::append(AbsolutePath, DWOName); + DWOCompDir = opts::CompDirOverride; + } else if (!sys::fs::exists(DWOCompDir) && sys::fs::exists(DWOName)) { + DWOCompDir = "."; + this->outs() + << "BOLT-WARNING: Debug Fission: Debug Compilation Directory of " + << DWOName + << " does not exist. Relative path will be used to process .dwo " + "files.\n"; } + // Prevent failures when DWOName is already an absolute path. + sys::fs::make_absolute(DWOCompDir, AbsolutePath); DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit(); if (!DWOCU->isDWOUnit()) { @@ -1644,7 +1654,8 @@ void BinaryContext::preprocessDWODebugInfo() { << "BOLT-WARNING: Debug Fission: DWO debug information for " << DWOName << " was not retrieved and won't be updated. Please check " - "relative path.\n"; + "relative path or use '--comp-dir-override' to specify the base " + "location.\n"; continue; } DWOCUs[*DWOId] = DWOCU; diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 0c1a1bac6c72e..6752489ad562a 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -1846,15 +1846,16 @@ void DWARFRewriter::writeDWOFiles( } std::string CompDir = CU.getCompilationDir(); + SmallString<16> AbsolutePath(DWOName); if (!opts::DwarfOutputPath.empty()) CompDir = opts::DwarfOutputPath.c_str(); else if (!opts::CompDirOverride.empty()) CompDir = opts::CompDirOverride; - - SmallString<16> AbsolutePath; - sys::path::append(AbsolutePath, CompDir); - sys::path::append(AbsolutePath, DWOName); + else if (!sys::fs::exists(CompDir)) + CompDir = "."; + // Prevent failures when DWOName is already an absolute path. + sys::fs::make_absolute(CompDir, AbsolutePath); std::error_code EC; std::unique_ptr TempOut = diff --git a/bolt/test/dwo-name-retrieving.test b/bolt/test/dwo-name-retrieving.test new file mode 100755 index 0000000000000..39193ccc6637a --- /dev/null +++ b/bolt/test/dwo-name-retrieving.test @@ -0,0 +1,19 @@ +## Test DWO retrieval via relative path with a missing CompDir. +## Also, verify no crash for an absolute DWOName path. + +## The case where DWOName is a relative path, and debug compilation directory does not exist. +# RUN: rm -rf %t && mkdir -p %t && cd %t +# RUN: %clang %cflags -g -gsplit-dwarf -fdebug-compilation-dir=/path/does/not/exist %p/Inputs/hello.c -o main.exe +# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -update-debug-sections 2>&1 | FileCheck %s -check-prefix=DWO-NAME-REL + +# DWO-NAME-REL: BOLT-WARNING: Debug Fission: Debug Compilation Directory of main.exe-hello.dwo does not exist. +# DWO-NAME-REL-NOT: Debug Fission: DWO debug information for + +## The case where DWOName is a absolute path, and a dwp file is provided. +# RUN: %clang %cflags -g -gsplit-dwarf %p/Inputs/hello.c -o %t/main.exe +# RUN: llvm-dwp -e %t/main.exe -o %t/main.exe.dwp +# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -update-debug-sections -dwp=%t/main.exe.dwp 2>&1 | FileCheck %s -check-prefix=DWO-NAME-ABS + +# DWO-NAME-ABS-NOT: BOLT-WARNING: Debug Fission: Debug Compilation Directory of {{.*}}/main.exe-hello.dwo does not exist. +# DWO-NAME-ABS-NOT: Debug Fission: DWO debug information for +# DWO-NAME-ABS-NOT: Assertion `FD >= 0 && "File not yet open!"' failed. From b15719365861502cd1c6d216ad4a0425ee6431ea Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 20:31:47 -0700 Subject: [PATCH 288/734] [ADT] Handle uint8_t and uint16_t in countr_zero (#158518) Without this patch, the uint8_t and uint16_t cases are sent to the fallback route. This patch fixes that by relaxing the "if" condition. While it's hard to test that the correct control path is taken within countr_zero, this patch adds a few tests just to verify the correctness on uint8_t and uint16_t inputs. --- llvm/include/llvm/ADT/bit.h | 2 +- llvm/unittests/ADT/BitTest.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h index d6e33c3e6133a..2ca9b43519740 100644 --- a/llvm/include/llvm/ADT/bit.h +++ b/llvm/include/llvm/ADT/bit.h @@ -161,7 +161,7 @@ template [[nodiscard]] int countr_zero(T Val) { return std::numeric_limits::digits; // Use the intrinsic if available. - if constexpr (sizeof(T) == 4) { + if constexpr (sizeof(T) <= 4) { #if __has_builtin(__builtin_ctz) || defined(__GNUC__) return __builtin_ctz(Val); #elif defined(_MSC_VER) diff --git a/llvm/unittests/ADT/BitTest.cpp b/llvm/unittests/ADT/BitTest.cpp index 2377ce3b78261..88ae36c44bdb9 100644 --- a/llvm/unittests/ADT/BitTest.cpp +++ b/llvm/unittests/ADT/BitTest.cpp @@ -297,6 +297,14 @@ TEST(BitTest, CountrZero) { EXPECT_EQ(1, llvm::countr_zero(NZ16)); EXPECT_EQ(1, llvm::countr_zero(NZ32)); EXPECT_EQ(1, llvm::countr_zero(NZ64)); + + EXPECT_EQ(0, llvm::countr_zero(uint8_t(1))); + EXPECT_EQ(3, llvm::countr_zero(uint8_t(8))); + EXPECT_EQ(7, llvm::countr_zero(uint8_t(128))); + + EXPECT_EQ(0, llvm::countr_zero(uint16_t(1))); + EXPECT_EQ(8, llvm::countr_zero(uint16_t(256))); + EXPECT_EQ(15, llvm::countr_zero(uint16_t(32768))); } TEST(BitTest, CountlOne) { From 1e3dd5ef29464b86005705bebec721ac5933bd85 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 20:31:55 -0700 Subject: [PATCH 289/734] [llvm] Use std::bool_constant (NFC) (#158520) This patch replaces, std::integral_constant with std::bool_constant for brevity. Note that std::bool_constant was introduced as part of C++17. There are cases where we could replace EXPECT_EQ(false, ...) with EXPECT_FALSE(...), but I'm not doing that in this patch to avoid doing multiple things in one patch. --- llvm/include/llvm/Support/YAMLTraits.h | 20 +++++++++----------- llvm/lib/IR/Metadata.cpp | 5 ++--- llvm/unittests/ADT/StringRefTest.cpp | 5 ++--- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index cce36a253777b..bbc12a2fcbe7a 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -668,17 +668,15 @@ inline QuotingType needsQuotes(StringRef S, bool ForcePreserveAsString = true) { template struct missingTraits - : public std::integral_constant::value && - !has_ScalarBitSetTraits::value && - !has_ScalarTraits::value && - !has_BlockScalarTraits::value && - !has_TaggedScalarTraits::value && - !has_MappingTraits::value && - !has_SequenceTraits::value && - !has_CustomMappingTraits::value && - !has_DocumentListTraits::value && - !has_PolymorphicTraits::value> {}; + : public std::bool_constant< + !has_ScalarEnumerationTraits::value && + !has_ScalarBitSetTraits::value && !has_ScalarTraits::value && + !has_BlockScalarTraits::value && + !has_TaggedScalarTraits::value && + !has_MappingTraits::value && + !has_SequenceTraits::value && !has_CustomMappingTraits::value && + !has_DocumentListTraits::value && + !has_PolymorphicTraits::value> {}; template struct validatedMappingTraits diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index 1157cbe6bbc1b..fc78a5b299f49 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1007,8 +1007,7 @@ MDNode *MDNode::uniquify() { #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ case CLASS##Kind: { \ CLASS *SubclassThis = cast(this); \ - std::integral_constant::value> \ - ShouldRecalculateHash; \ + std::bool_constant::value> ShouldRecalculateHash; \ dispatchRecalculateHash(SubclassThis, ShouldRecalculateHash); \ return uniquifyImpl(SubclassThis, getContext().pImpl->CLASS##s); \ } @@ -1065,7 +1064,7 @@ void MDNode::storeDistinctInContext() { llvm_unreachable("Invalid subclass of MDNode"); #define HANDLE_MDNODE_LEAF(CLASS) \ case CLASS##Kind: { \ - std::integral_constant::value> ShouldResetHash; \ + std::bool_constant::value> ShouldResetHash; \ dispatchResetHash(cast(this), ShouldResetHash); \ break; \ } diff --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp index d5f8dc41cdb6b..1ace29e96dbb8 100644 --- a/llvm/unittests/ADT/StringRefTest.cpp +++ b/llvm/unittests/ADT/StringRefTest.cpp @@ -1124,14 +1124,13 @@ TEST(StringRefTest, StringLiteral) { constexpr StringRef StringRefs[] = {"Foo", "Bar"}; EXPECT_EQ(StringRef("Foo"), StringRefs[0]); EXPECT_EQ(3u, (std::integral_constant::value)); - EXPECT_EQ(false, - (std::integral_constant::value)); + EXPECT_EQ(false, (std::bool_constant::value)); EXPECT_EQ(StringRef("Bar"), StringRefs[1]); constexpr StringLiteral Strings[] = {"Foo", "Bar"}; EXPECT_EQ(StringRef("Foo"), Strings[0]); EXPECT_EQ(3u, (std::integral_constant::value)); - EXPECT_EQ(false, (std::integral_constant::value)); + EXPECT_EQ(false, (std::bool_constant::value)); EXPECT_EQ(StringRef("Bar"), Strings[1]); } From c44e015b49e832c9f3749c33cf5c9d5aacaf60a4 Mon Sep 17 00:00:00 2001 From: William Moses Date: Sun, 14 Sep 2025 22:50:42 -0500 Subject: [PATCH 290/734] [SimplifyCFG] Refine metadata handling during instruction hoisting (#158448) Co-authored-by: Nikita Popov --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 +- ...no-drop-debug-loc-when-speculating-call.ll | 43 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/no-drop-debug-loc-when-speculating-call.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 5a842f9b49c1b..a1f759dd1df83 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3392,7 +3392,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI, // hoisting above. for (auto &I : make_early_inc_range(*ThenBB)) { if (!SpeculatedStoreValue || &I != SpeculatedStore) { - I.setDebugLoc(DebugLoc::getDropped()); + I.dropLocation(); } I.dropUBImplyingAttrsAndMetadata(); diff --git a/llvm/test/Transforms/SimplifyCFG/no-drop-debug-loc-when-speculating-call.ll b/llvm/test/Transforms/SimplifyCFG/no-drop-debug-loc-when-speculating-call.ll new file mode 100644 index 0000000000000..dd1db41632c98 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/no-drop-debug-loc-when-speculating-call.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -o - %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 | FileCheck %s + + +declare i1 @make_condition() + +define i1 @specfn() readnone nounwind speculatable { + ret i1 true +} + +; CHECK-LABEL: @test1( +; CHECK: call i1 @specfn(), !dbg +; CHECK: select i1 +define void @test1(i1 %cond) !dbg !6 { +start: + br i1 %cond, label %then, label %else, !dbg !9 + +then: ; preds = %start + %sres = call i1 @specfn(), !dbg !8 + br label %else, !dbg !11 + +else: ; preds = %then, %start + %phi = phi i1 [ %cond, %start ], [ %sres, %then ], !dbg !12 + ret void, !dbg !13 +} + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!3, !4} +!llvm.module.flags = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "test.ll", directory: "/") +!2 = !{} +!3 = !{i32 6} +!4 = !{i32 0} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DISubprogram(name: "test1", linkageName: "test1", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 1, column: 1, scope: !6) +!9 = !DILocation(line: 2, column: 1, scope: !6) +!10 = !DILocation(line: 3, column: 2, scope: !6) +!11 = !DILocation(line: 4, column: 2, scope: !6) +!12 = !DILocation(line: 5, column: 3, scope: !6) +!13 = !DILocation(line: 6, column: 3, scope: !6) From 65ad21d730d25789454d18e811f8ff5db79cb5d4 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 15 Sep 2025 12:14:04 +0800 Subject: [PATCH 291/734] [RISCV] Handle recurrences in RISCVVLOptimizer (#151285) After #144666 we now support vectorizing loops with induction variables with EVL tail folding. The induction updates don't use VP intrinsics to avoid VL toggles but instead rely on RISCVVLOptimizer. However RISCVVLOptimizer can't reason about cycles or recurrences today, which means we are left with a VL toggle to VLMAX: # %bb.1: # %for.body.preheader li a2, 0 vsetvli a3, zero, e32, m2, ta, ma vid.v v8 .LBB0_2: # %vector.body # =>This Inner Loop Header: Depth=1 sub a3, a1, a2 sh2add a4, a2, a0 vsetvli a3, a3, e32, m2, ta, ma vle32.v v10, (a4) add a2, a2, a3 vadd.vv v10, v10, v8 vse32.v v10, (a4) vsetvli a4, zero, e32, m2, ta, ma vadd.vx v8, v8, a3 bne a2, a1, .LBB0_2 This patch teaches RISCVVLOptimizer to reason about recurrences so we can remove the VLMAX toggle: # %bb.1: # %for.body.preheader li a2, 0 vsetvli a3, zero, e32, m2, ta, ma vid.v v8 .LBB0_2: # %vector.body # =>This Inner Loop Header: Depth=1 sub a3, a1, a2 sh2add a4, a2, a0 vsetvli a3, a3, e32, m2, ta, ma vle32.v v10, (a4) add a2, a2, a3 vadd.vv v10, v10, v8 vse32.v v10, (a4) vadd.vx v8, v8, a3 bne a2, a1, .LBB0_2 With this we remove a significant number of VL toggles and vsetvli instructions across llvm-test-suite and SPEC CPU 2017 with tail folding enabled, since it affects every loop with an induction variable. This builds upon the work in #124530 where we started computing what VL each instruction demanded, and generalizes it to an optimistic sparse dataflow analysis: - We begin by optimistically assuming no VL is used by any instruction, and push instructions onto the worklist starting from the bottom. - For each instruction on the worklist we apply the transfer function, which propagates the VL needed by that instruction upwards to the instructions it uses. If a use's demanded VL changes, it's added to the worklist. - Eventually this converges to a fixpoint when all uses have been processed and every demanded VL has been propagated throughout the entire use-def chain. Only after this is the DemandedVL map accurate. Some implementation details: - The roots are stores (or other unsupported instructions not in `isSupportedInstr`) or copies to physical registers (they fail the `any_of(MI.defs(), isPhysical)` check) - This patch untangles `getMinimumVLForUser` and `checkUsers`. `getMinimumVLForUser` now returns how many lanes of an operand are read by an instruction, whilst `checkUsers` checks that an instruction and its users have compatible EEW/EMULs. - The `DemandedVL` struct was added so that we have a default constructor of 0 for `DenseMap DemandedVLs`, so we don't need to check if a key exists when looking things up. There was no measurable compile time impact on llvm-test-suite or SPEC CPU 2017. The analysis will always terminate, there are more details in this EuroLLVM talk here: https://www.youtube.com/watch?v=Mfb5fRSdJAc Fixes #149354 --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 169 +++++++++++------- .../CodeGen/RISCV/rvv/reproducer-pr146855.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 87 +++++++++ llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 71 ++++++++ llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll | 4 +- 5 files changed, 271 insertions(+), 64 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 29526cf5a5273..a1134663c0e7a 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -10,9 +10,19 @@ // instructions are inserted. // // The purpose of this optimization is to make the VL argument, for instructions -// that have a VL argument, as small as possible. This is implemented by -// visiting each instruction in reverse order and checking that if it has a VL -// argument, whether the VL can be reduced. +// that have a VL argument, as small as possible. +// +// This is split into a sparse dataflow analysis where we determine what VL is +// demanded by each instruction first, and then afterwards try to reduce the VL +// of each instruction if it demands less than its VL operand. +// +// The analysis is explained in more detail in the 2025 EuroLLVM Developers' +// Meeting talk "Accidental Dataflow Analysis: Extending the RISC-V VL +// Optimizer", which is available on YouTube at +// https://www.youtube.com/watch?v=Mfb5fRSdJAc +// +// The slides for the talk are available at +// https://llvm.org/devmtg/2025-04/slides/technical_talk/lau_accidental_dataflow.pdf // //===---------------------------------------------------------------------===// @@ -30,6 +40,27 @@ using namespace llvm; namespace { +/// Wrapper around MachineOperand that defaults to immediate 0. +struct DemandedVL { + MachineOperand VL; + DemandedVL() : VL(MachineOperand::CreateImm(0)) {} + DemandedVL(MachineOperand VL) : VL(VL) {} + static DemandedVL vlmax() { + return DemandedVL(MachineOperand::CreateImm(RISCV::VLMaxSentinel)); + } + bool operator!=(const DemandedVL &Other) const { + return !VL.isIdenticalTo(Other.VL); + } + + DemandedVL max(const DemandedVL &X) const { + if (RISCV::isVLKnownLE(VL, X.VL)) + return X; + if (RISCV::isVLKnownLE(X.VL, VL)) + return *this; + return DemandedVL::vlmax(); + } +}; + class RISCVVLOptimizer : public MachineFunctionPass { const MachineRegisterInfo *MRI; const MachineDominatorTree *MDT; @@ -51,17 +82,25 @@ class RISCVVLOptimizer : public MachineFunctionPass { StringRef getPassName() const override { return PASS_NAME; } private: - std::optional - getMinimumVLForUser(const MachineOperand &UserOp) const; - /// Returns the largest common VL MachineOperand that may be used to optimize - /// MI. Returns std::nullopt if it failed to find a suitable VL. - std::optional checkUsers(const MachineInstr &MI) const; + DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const; + /// Returns true if the users of \p MI have compatible EEWs and SEWs. + bool checkUsers(const MachineInstr &MI) const; bool tryReduceVL(MachineInstr &MI) const; bool isCandidate(const MachineInstr &MI) const; + void transfer(const MachineInstr &MI); /// For a given instruction, records what elements of it are demanded by /// downstream users. - DenseMap> DemandedVLs; + DenseMap DemandedVLs; + SetVector Worklist; + + /// \returns all vector virtual registers that \p MI uses. + auto virtual_vec_uses(const MachineInstr &MI) const { + return make_filter_range(MI.uses(), [this](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isVirtual() && + RISCVRegisterInfo::isRVVRegClass(MRI->getRegClass(MO.getReg())); + }); + } }; /// Represents the EMUL and EEW of a MachineOperand. @@ -847,10 +886,15 @@ static std::optional getOperandInfo(const MachineOperand &MO) { return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(*Log2EEW, MI), *Log2EEW); } +static bool isTupleInsertInstr(const MachineInstr &MI); + /// Return true if this optimization should consider MI for VL reduction. This /// white-list approach simplifies this optimization for instructions that may /// have more complex semantics with relation to how it uses VL. static bool isSupportedInstr(const MachineInstr &MI) { + if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI)) + return true; + const RISCVVPseudosTable::PseudoInfo *RVV = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); @@ -1348,21 +1392,24 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { return true; } -std::optional +DemandedVL RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { const MachineInstr &UserMI = *UserOp.getParent(); const MCInstrDesc &Desc = UserMI.getDesc(); + if (UserMI.isPHI() || UserMI.isFullCopy() || isTupleInsertInstr(UserMI)) + return DemandedVLs.lookup(&UserMI); + if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) { LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that" " use VLMAX\n"); - return std::nullopt; + return DemandedVL::vlmax(); } if (RISCVII::readsPastVL( TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) { LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); - return std::nullopt; + return DemandedVL::vlmax(); } unsigned VLOpNum = RISCVII::getVLOpNum(Desc); @@ -1376,11 +1423,10 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { if (UserOp.isTied()) { assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() && RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc())); - auto DemandedVL = DemandedVLs.lookup(&UserMI); - if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) { + if (!RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) { LLVM_DEBUG(dbgs() << " Abort because user is passthru in " "instruction with demanded tail\n"); - return std::nullopt; + return DemandedVL::vlmax(); } } @@ -1393,11 +1439,8 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { // If we know the demanded VL of UserMI, then we can reduce the VL it // requires. - if (auto DemandedVL = DemandedVLs.lookup(&UserMI)) { - assert(isCandidate(UserMI)); - if (RISCV::isVLKnownLE(*DemandedVL, VLOp)) - return DemandedVL; - } + if (RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) + return DemandedVLs.lookup(&UserMI); return VLOp; } @@ -1450,22 +1493,23 @@ static bool isSegmentedStoreInstr(const MachineInstr &MI) { } } -std::optional -RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { - std::optional CommonVL; - SmallSetVector Worklist; +bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { + if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI)) + return true; + + SmallSetVector OpWorklist; SmallPtrSet PHISeen; for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) - Worklist.insert(&UserOp); + OpWorklist.insert(&UserOp); - while (!Worklist.empty()) { - MachineOperand &UserOp = *Worklist.pop_back_val(); + while (!OpWorklist.empty()) { + MachineOperand &UserOp = *OpWorklist.pop_back_val(); const MachineInstr &UserMI = *UserOp.getParent(); LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n"); if (UserMI.isFullCopy() && UserMI.getOperand(0).getReg().isVirtual()) { LLVM_DEBUG(dbgs() << " Peeking through uses of COPY\n"); - Worklist.insert_range(llvm::make_pointer_range( + OpWorklist.insert_range(llvm::make_pointer_range( MRI->use_operands(UserMI.getOperand(0).getReg()))); continue; } @@ -1481,8 +1525,8 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { // whole register group). if (!isTupleInsertInstr(CandidateMI) && !isSegmentedStoreInstr(CandidateMI)) - return std::nullopt; - Worklist.insert(&UseOp); + return false; + OpWorklist.insert(&UseOp); } continue; } @@ -1492,28 +1536,14 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { if (!PHISeen.insert(&UserMI).second) continue; LLVM_DEBUG(dbgs() << " Peeking through uses of PHI\n"); - Worklist.insert_range(llvm::make_pointer_range( + OpWorklist.insert_range(llvm::make_pointer_range( MRI->use_operands(UserMI.getOperand(0).getReg()))); continue; } - auto VLOp = getMinimumVLForUser(UserOp); - if (!VLOp) - return std::nullopt; - - // Use the largest VL among all the users. If we cannot determine this - // statically, then we cannot optimize the VL. - if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) { - CommonVL = *VLOp; - LLVM_DEBUG(dbgs() << " User VL is: " << VLOp << "\n"); - } else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) { - LLVM_DEBUG(dbgs() << " Abort because cannot determine a common VL\n"); - return std::nullopt; - } - if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) { LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n"); - return std::nullopt; + return false; } std::optional ConsumerInfo = getOperandInfo(UserOp); @@ -1522,7 +1552,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n"); LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n"); LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n"); - return std::nullopt; + return false; } if (!OperandInfo::areCompatible(*ProducerInfo, *ConsumerInfo)) { @@ -1531,11 +1561,11 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { << " Abort due to incompatible information for EMUL or EEW.\n"); LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n"); LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n"); - return std::nullopt; + return false; } } - return CommonVL; + return true; } bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { @@ -1551,9 +1581,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { return false; } - auto CommonVL = DemandedVLs.lookup(&MI); - if (!CommonVL) - return false; + auto *CommonVL = &DemandedVLs.at(&MI).VL; assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) && "Expected VL to be an Imm or virtual Reg"); @@ -1564,7 +1592,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg()); if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) && !MDT->dominates(VLMI, &MI)) - CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc())); + CommonVL = &VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc())); } if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) { @@ -1599,6 +1627,24 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { return true; } +static bool isPhysical(const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isPhysical(); +} + +/// Look through \p MI's operands and propagate what it demands to its uses. +void RISCVVLOptimizer::transfer(const MachineInstr &MI) { + if (!isSupportedInstr(MI) || !checkUsers(MI) || any_of(MI.defs(), isPhysical)) + DemandedVLs[&MI] = DemandedVL::vlmax(); + + for (const MachineOperand &MO : virtual_vec_uses(MI)) { + const MachineInstr *Def = MRI->getVRegDef(MO.getReg()); + DemandedVL Prev = DemandedVLs[Def]; + DemandedVLs[Def] = DemandedVLs[Def].max(getMinimumVLForUser(MO)); + if (DemandedVLs[Def] != Prev) + Worklist.insert(Def); + } +} + bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -1614,15 +1660,18 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { assert(DemandedVLs.empty()); - // For each instruction that defines a vector, compute what VL its - // downstream users demand. + // For each instruction that defines a vector, propagate the VL it + // uses to its inputs. for (MachineBasicBlock *MBB : post_order(&MF)) { assert(MDT->isReachableFromEntry(MBB)); - for (MachineInstr &MI : reverse(*MBB)) { - if (!isCandidate(MI)) - continue; - DemandedVLs.insert({&MI, checkUsers(MI)}); - } + for (MachineInstr &MI : reverse(*MBB)) + Worklist.insert(&MI); + } + + while (!Worklist.empty()) { + const MachineInstr *MI = Worklist.front(); + Worklist.remove(MI); + transfer(*MI); } // Then go through and see if we can reduce the VL of any instructions to diff --git a/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll b/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll index cca00bf58063d..2d64defe8c7b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll +++ b/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll @@ -6,7 +6,7 @@ target triple = "riscv64-unknown-linux-gnu" define i32 @_ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_( %wide.load, %0, %1, %2, %3) #0 { ; CHECK-LABEL: _ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: vmv.v.i v10, 0 @@ -14,7 +14,7 @@ define i32 @_ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_( %wi ; CHECK-NEXT: vmv.v.i v14, 0 ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: vmv2r.v v16, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 20608cd6bed87..3844b984455c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -238,3 +238,90 @@ define void @segmented_store_insert_subreg( %v0, , 3) %t2, ptr %p, iXLen %vl, iXLen 5) ret void } + +define void @recurrence( %v, ptr %p, iXLen %n, iXLen %vl) { +; CHECK-LABEL: recurrence: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: .LBB16_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vadd.vv v10, v10, v8 +; CHECK-NEXT: bnez a1, .LBB16_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret +entry: + br label %loop +loop: + %iv = phi iXLen [ 0, %entry ], [ %iv.next, %loop ] + %phi = phi [ zeroinitializer, %entry ], [ %x, %loop ] + %x = add %phi, %v + %iv.next = add iXLen %iv, 1 + %done = icmp eq iXLen %iv.next, %n + br i1 %done, label %exit, label %loop +exit: + call void @llvm.riscv.vse( %x, ptr %p, iXLen %vl) + ret void +} + +define void @recurrence_vleff( %v, ptr %p, iXLen %n, iXLen %vl) { +; CHECK-LABEL: recurrence_vleff: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB17_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v10, (a3) +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, 4 +; CHECK-NEXT: bnez a1, .LBB17_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: ret +entry: + br label %loop +loop: + %iv = phi iXLen [ 0, %entry ], [ %iv.next, %loop ] + %phi = phi [ zeroinitializer, %entry ], [ %y, %loop ] + %gep = getelementptr i32, ptr %p, iXLen %iv + %vleff = call { , iXLen } @llvm.riscv.vleff( poison, ptr %gep, iXLen %vl) + %vleff.x = extractvalue { , iXLen } %vleff, 0 + %vleff.vl = extractvalue { , iXLen } %vleff, 1 + %y = add %phi, %vleff.x + call void @llvm.riscv.vse( %y, ptr %p, iXLen %vleff.vl) + %iv.next = add iXLen %iv, 1 + %done = icmp eq iXLen %iv.next, %n + br i1 %done, label %exit, label %loop +exit: + ret void +} + +define @join( %v, i1 %cond, iXLen %vl) { +; CHECK-LABEL: join: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: beqz a0, .LBB18_2 +; CHECK-NEXT: # %bb.1: # %foo +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB18_2: # %bar +; CHECK-NEXT: vadd.vi v8, v8, 2 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd( poison, %v, iXLen 1, iXLen -1) + br i1 %cond, label %foo, label %bar +foo: + %b = call @llvm.riscv.vadd( poison, %a, iXLen 1, iXLen 1) + ret %b +bar: + %c = call @llvm.riscv.vadd( poison, %a, iXLen 2, iXLen 2) + ret %c +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index 086b3203ed5b0..9174b98de0aa9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -699,3 +699,74 @@ body: | %11:vr = PseudoVADD_VV_M1 $noreg, %2, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ $v10 = COPY %11 PseudoRET implicit $v10 +... +--- +name: recurrence +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: recurrence + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 + ; CHECK-NEXT: %start:vr = PseudoVMV_V_I_M1 $noreg, 0, %avl, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + ; CHECK-NEXT: %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, %avl, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: BNE $noreg, $noreg, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: PseudoVSE8_V_M1 %inc, $noreg, %avl, 3 /* e8 */ + bb.0: + liveins: $x8 + %avl:gprnox0 = COPY $x8 + %start:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 3, /* ta, ma */ + PseudoBR %bb.1 + bb.1: + %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, -1, 3 /* e8 */, 3 /* ta, ma */ + BNE $noreg, $noreg, %bb.1 + bb.2: + PseudoVSE8_V_M1 %inc, $noreg, %avl, 3 /* e8 */ +... +--- +name: recurrence_cant_reduce +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: recurrence_cant_reduce + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x8, $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl1:gprnox0 = COPY $x8 + ; CHECK-NEXT: %avl2:gprnox0 = COPY $x8 + ; CHECK-NEXT: %start:vr = PseudoVMV_V_I_M1 $noreg, 0, %avl1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + ; CHECK-NEXT: %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, %avl1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: BNE $noreg, $noreg, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: PseudoVSE8_V_M1 %inc, $noreg, %avl2, 3 /* e8 */ + bb.0: + liveins: $x8, $x9 + %avl1:gprnox0 = COPY $x8 + %avl2:gprnox0 = COPY $x8 + %start:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 3, /* ta, ma */ + PseudoBR %bb.1 + bb.1: + %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, %avl1, 3 /* e8 */, 3 /* ta, ma */ + BNE $noreg, $noreg, %bb.1 + bb.2: + PseudoVSE8_V_M1 %inc, $noreg, %avl2, 3 /* e8 */ +... diff --git a/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll b/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll index 4b9f9a0579c48..3a05477e64ccd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll @@ -11,7 +11,7 @@ ; which was responsible for speeding it up. define @same_vl_imm( %passthru, %a, %b) { - ; CHECK: User VL is: 4 + ; CHECK: Trying to reduce VL for %{{.+}}:vrm2 = PseudoVADD_VV_M2 ; CHECK: Abort due to CommonVL == VLOp, no point in reducing. %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, i64 4) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, i64 4) @@ -19,7 +19,7 @@ define @same_vl_imm( %passthru, @same_vl_reg( %passthru, %a, %b, i64 %vl) { - ; CHECK: User VL is: %3:gprnox0 + ; CHECK: Trying to reduce VL for %{{.+}}:vrm2 = PseudoVADD_VV_M2 ; CHECK: Abort due to CommonVL == VLOp, no point in reducing. %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, i64 %vl) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, i64 %vl) From 6a4f66476ff59a32898891345bc07547e71028ec Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Mon, 15 Sep 2025 00:45:30 -0400 Subject: [PATCH 292/734] [MLIR][Python] restore `liveModuleMap` (#158506) There are cases where the same module can have multiple references (via `PyModule::forModule` via `PyModule::createFromCapsule`) and thus when `PyModule`s get gc'd `mlirModuleDestroy` can get called multiple times for the same actual underlying `mlir::Module` (i.e., double free). So we do actually need a "liveness map" for modules. Note, if `type_caster::from_cpp` weren't a thing we could guarantree this never happened except explicitly when users called `PyModule::createFromCapsule`. --- mlir/lib/Bindings/Python/IRCore.cpp | 42 ++++++++++++++++++++--------- mlir/lib/Bindings/Python/IRModule.h | 12 +++++++++ mlir/test/python/ir/module.py | 8 +++--- 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index 8273a9346e5dd..10360e448858c 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -1079,23 +1079,38 @@ PyLocation &DefaultingPyLocation::resolve() { PyModule::PyModule(PyMlirContextRef contextRef, MlirModule module) : BaseContextObject(std::move(contextRef)), module(module) {} -PyModule::~PyModule() { mlirModuleDestroy(module); } +PyModule::~PyModule() { + nb::gil_scoped_acquire acquire; + auto &liveModules = getContext()->liveModules; + assert(liveModules.count(module.ptr) == 1 && + "destroying module not in live map"); + liveModules.erase(module.ptr); + mlirModuleDestroy(module); +} PyModuleRef PyModule::forModule(MlirModule module) { MlirContext context = mlirModuleGetContext(module); PyMlirContextRef contextRef = PyMlirContext::forContext(context); - // Create. - PyModule *unownedModule = new PyModule(std::move(contextRef), module); - // Note that the default return value policy on cast is `automatic_reference`, - // which means "does not take ownership, does not call delete/dtor". - // We use `take_ownership`, which means "Python will call the C++ destructor - // and delete operator when the Python wrapper is garbage collected", because - // MlirModule actually wraps OwningOpRef (see mlirModuleCreateParse - // etc). - nb::object pyRef = nb::cast(unownedModule, nb::rv_policy::take_ownership); - unownedModule->handle = pyRef; - return PyModuleRef(unownedModule, std::move(pyRef)); + nb::gil_scoped_acquire acquire; + auto &liveModules = contextRef->liveModules; + auto it = liveModules.find(module.ptr); + if (it == liveModules.end()) { + // Create. + PyModule *unownedModule = new PyModule(std::move(contextRef), module); + // Note that the default return value policy on cast is automatic_reference, + // which does not take ownership (delete will not be called). + // Just be explicit. + nb::object pyRef = nb::cast(unownedModule, nb::rv_policy::take_ownership); + unownedModule->handle = pyRef; + liveModules[module.ptr] = + std::make_pair(unownedModule->handle, unownedModule); + return PyModuleRef(unownedModule, std::move(pyRef)); + } + // Use existing. + PyModule *existing = it->second.second; + nb::object pyRef = nb::borrow(it->second.first); + return PyModuleRef(existing, std::move(pyRef)); } nb::object PyModule::createFromCapsule(nb::object capsule) { @@ -2084,6 +2099,8 @@ PyInsertionPoint PyInsertionPoint::after(PyOperationBase &op) { return PyInsertionPoint{block, std::move(nextOpRef)}; } +size_t PyMlirContext::getLiveModuleCount() { return liveModules.size(); } + nb::object PyInsertionPoint::contextEnter(nb::object insertPoint) { return PyThreadContextEntry::pushInsertionPoint(insertPoint); } @@ -2923,6 +2940,7 @@ void mlir::python::populateIRCore(nb::module_ &m) { PyMlirContextRef ref = PyMlirContext::forContext(self.get()); return ref.releaseObject(); }) + .def("_get_live_module_count", &PyMlirContext::getLiveModuleCount) .def_prop_ro(MLIR_PYTHON_CAPI_PTR_ATTR, &PyMlirContext::getCapsule) .def(MLIR_PYTHON_CAPI_FACTORY_ATTR, &PyMlirContext::createFromCapsule) .def("__enter__", &PyMlirContext::contextEnter) diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h index 1d1ff29533f98..28b885f136fe0 100644 --- a/mlir/lib/Bindings/Python/IRModule.h +++ b/mlir/lib/Bindings/Python/IRModule.h @@ -218,6 +218,10 @@ class PyMlirContext { /// Gets the count of live context objects. Used for testing. static size_t getLiveCount(); + /// Gets the count of live modules associated with this context. + /// Used for testing. + size_t getLiveModuleCount(); + /// Enter and exit the context manager. static nanobind::object contextEnter(nanobind::object context); void contextExit(const nanobind::object &excType, @@ -244,6 +248,14 @@ class PyMlirContext { static nanobind::ft_mutex live_contexts_mutex; static LiveContextMap &getLiveContexts(); + // Interns all live modules associated with this context. Modules tracked + // in this map are valid. When a module is invalidated, it is removed + // from this map, and while it still exists as an instance, any + // attempt to access it will raise an error. + using LiveModuleMap = + llvm::DenseMap>; + LiveModuleMap liveModules; + bool emitErrorDiagnostics = false; MlirContext context; diff --git a/mlir/test/python/ir/module.py b/mlir/test/python/ir/module.py index ad4c9340a6c82..33959bea9ffb6 100644 --- a/mlir/test/python/ir/module.py +++ b/mlir/test/python/ir/module.py @@ -121,6 +121,7 @@ def testRoundtripBinary(): def testModuleOperation(): ctx = Context() module = Module.parse(r"""module @successfulParse {}""", ctx) + assert ctx._get_live_module_count() == 1 op1 = module.operation # CHECK: module @successfulParse print(op1) @@ -145,6 +146,7 @@ def testModuleOperation(): op1 = None op2 = None gc.collect() + assert ctx._get_live_module_count() == 0 # CHECK-LABEL: TEST: testModuleCapsule @@ -152,17 +154,17 @@ def testModuleOperation(): def testModuleCapsule(): ctx = Context() module = Module.parse(r"""module @successfulParse {}""", ctx) + assert ctx._get_live_module_count() == 1 # CHECK: "mlir.ir.Module._CAPIPtr" module_capsule = module._CAPIPtr print(module_capsule) module_dup = Module._CAPICreate(module_capsule) - assert module is not module_dup + assert module is module_dup assert module == module_dup - module._clear_mlir_module() - assert module != module_dup assert module_dup.context is ctx # Gc and verify destructed. module = None module_capsule = None module_dup = None gc.collect() + assert ctx._get_live_module_count() == 0 From 4a11ccee497a3e266c38eb6e9279f75cc61cfb9d Mon Sep 17 00:00:00 2001 From: SunilKuravinakop <98882378+SunilKuravinakop@users.noreply.github.com> Date: Mon, 15 Sep 2025 10:40:37 +0530 Subject: [PATCH 293/734] [Clang][OpenMP]Default clause variable category (#157063) Support for Variable Category in Default Clause. --------- Co-authored-by: Sunil Kuravinakop --- clang/docs/OpenMPSupport.rst | 2 +- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/AST/OpenMPClause.h | 22 ++- .../clang/Basic/DiagnosticSemaKinds.td | 2 + clang/include/clang/Basic/OpenMPKinds.def | 11 ++ clang/include/clang/Basic/OpenMPKinds.h | 11 ++ clang/include/clang/Sema/SemaOpenMP.h | 10 +- clang/lib/AST/OpenMPClause.cpp | 9 +- clang/lib/Basic/OpenMPKinds.cpp | 21 ++- clang/lib/Parse/ParseOpenMP.cpp | 40 +++++- clang/lib/Sema/SemaOpenMP.cpp | 125 +++++++++++++++--- clang/lib/Sema/TreeTransform.h | 9 +- clang/lib/Serialization/ASTReader.cpp | 3 + clang/lib/Serialization/ASTWriter.cpp | 2 + clang/test/OpenMP/parallel_ast_print.cpp | 42 ++++++ .../test/OpenMP/parallel_default_messages.cpp | 18 +++ 16 files changed, 295 insertions(+), 33 deletions(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 47a8109abb21c..0bc8590815220 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -576,7 +576,7 @@ implementation. | | | | Flang parser: https://github.com/llvm/llvm-project/pull/153807 | | | | | Flang sema: https://github.com/llvm/llvm-project/pull/154779 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| variable-category on default clause | :part:`In Progress` | :none:`unclaimed` | | +| variable-category on default clause | :good:`done` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Changes to omp_target_is_accessible | :part:`In Progress` | :part:`In Progress` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 873d63f56480c..41bec2666f939 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -521,6 +521,7 @@ OpenMP Support - Allow array length to be omitted in array section subscript expression. - Fixed non-contiguous strided update in the ``omp target update`` directive with the ``from`` clause. - Properly handle array section/assumed-size array privatization in C/C++. +- Added support for ``variable-category`` modifier in ``default clause``. Improvements ^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 72effbc3e02fc..b2a6d4b9182b0 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1269,6 +1269,12 @@ class OMPDefaultClause : public OMPClause { /// Start location of the kind in source code. SourceLocation KindKwLoc; + /// Variable-Category to indicate where Kind is applied + OpenMPDefaultClauseVariableCategory VC = OMPC_DEFAULT_VC_all; + + /// Start location of Variable-Category + SourceLocation VCLoc; + /// Set kind of the clauses. /// /// \param K Argument of clause. @@ -1279,6 +1285,15 @@ class OMPDefaultClause : public OMPClause { /// \param KLoc Argument location. void setDefaultKindKwLoc(SourceLocation KLoc) { KindKwLoc = KLoc; } + /// Set Variable Category used with the Kind Clause (Default Modifier) + void setDefaultVariableCategory(OpenMPDefaultClauseVariableCategory VC) { + this->VC = VC; + } + + void setDefaultVariableCategoryLocation(SourceLocation VCLoc) { + this->VCLoc = VCLoc; + } + public: /// Build 'default' clause with argument \a A ('none' or 'shared'). /// @@ -1288,10 +1303,11 @@ class OMPDefaultClause : public OMPClause { /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. OMPDefaultClause(llvm::omp::DefaultKind A, SourceLocation ALoc, + OpenMPDefaultClauseVariableCategory VC, SourceLocation VCLoc, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) : OMPClause(llvm::omp::OMPC_default, StartLoc, EndLoc), - LParenLoc(LParenLoc), Kind(A), KindKwLoc(ALoc) {} + LParenLoc(LParenLoc), Kind(A), KindKwLoc(ALoc), VC(VC), VCLoc(VCLoc) {} /// Build an empty clause. OMPDefaultClause() @@ -1310,6 +1326,10 @@ class OMPDefaultClause : public OMPClause { /// Returns location of clause kind. SourceLocation getDefaultKindKwLoc() const { return KindKwLoc; } + OpenMPDefaultClauseVariableCategory getDefaultVC() const { return VC; } + + SourceLocation getDefaultVCLoc() const { return VCLoc; } + child_range children() { return child_range(child_iterator(), child_iterator()); } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index b0e669cd3560d..757404a3f5eac 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11714,6 +11714,8 @@ def note_omp_default_dsa_none : Note< "explicit data sharing attribute requested here">; def note_omp_defaultmap_attr_none : Note< "explicit data sharing attribute, data mapping attribute, or is_device_ptr clause requested here">; +def err_omp_default_vc : Error< + "wrong variable category specified with modifier %0 in the default clause">; def err_omp_wrong_dsa : Error< "%0 variable cannot be %1">; def err_omp_variably_modified_type_not_supported : Error< diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index 9d6f816eea91f..79c11b851c557 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -35,6 +35,9 @@ #ifndef OPENMP_DIST_SCHEDULE_KIND #define OPENMP_DIST_SCHEDULE_KIND(Name) #endif +#ifndef OPENMP_DEFAULT_VARIABLE_CATEGORY +#define OPENMP_DEFAULT_VARIABLE_CATEGORY(Name) +#endif #ifndef OPENMP_DEFAULTMAP_KIND #define OPENMP_DEFAULTMAP_KIND(Name) #endif @@ -112,6 +115,13 @@ OPENMP_SCHEDULE_MODIFIER(simd) OPENMP_DEVICE_MODIFIER(ancestor) OPENMP_DEVICE_MODIFIER(device_num) +// Variable-category attributes for 'default' clause. +OPENMP_DEFAULT_VARIABLE_CATEGORY(aggregate) +OPENMP_DEFAULT_VARIABLE_CATEGORY(all) +OPENMP_DEFAULT_VARIABLE_CATEGORY(allocatable) +OPENMP_DEFAULT_VARIABLE_CATEGORY(pointer) +OPENMP_DEFAULT_VARIABLE_CATEGORY(scalar) + // Static attributes for 'defaultmap' clause. OPENMP_DEFAULTMAP_KIND(scalar) OPENMP_DEFAULTMAP_KIND(aggregate) @@ -267,6 +277,7 @@ OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration) #undef OPENMP_MAP_MODIFIER_KIND #undef OPENMP_MOTION_MODIFIER_KIND #undef OPENMP_DIST_SCHEDULE_KIND +#undef OPENMP_DEFAULT_VARIABLE_CATEGORY #undef OPENMP_DEFAULTMAP_KIND #undef OPENMP_DEFAULTMAP_MODIFIER #undef OPENMP_DOACROSS_MODIFIER diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index d3285cd9c6a14..115af7b19d6e4 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -107,6 +107,13 @@ enum OpenMPDistScheduleClauseKind { OMPC_DIST_SCHEDULE_unknown }; +/// OpenMP variable-category for 'default' clause. +enum OpenMPDefaultClauseVariableCategory { +#define OPENMP_DEFAULT_VARIABLE_CATEGORY(Name) OMPC_DEFAULT_VC_##Name, +#include "clang/Basic/OpenMPKinds.def" + OMPC_DEFAULT_VC_unknown +}; + /// OpenMP attributes for 'defaultmap' clause. enum OpenMPDefaultmapClauseKind { #define OPENMP_DEFAULTMAP_KIND(Name) \ @@ -257,6 +264,10 @@ struct OMPInteropInfo final { llvm::SmallVector PreferTypes; }; +OpenMPDefaultClauseVariableCategory +getOpenMPDefaultVariableCategory(StringRef Str, const LangOptions &LangOpts); +const char *getOpenMPDefaultVariableCategoryName(unsigned VC); + unsigned getOpenMPSimpleClauseType(OpenMPClauseKind Kind, llvm::StringRef Str, const LangOptions &LangOpts); const char *getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, unsigned Type); diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 91c3d4bd5210e..23827051ed724 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -951,11 +951,11 @@ class SemaOpenMP : public SemaBase { SourceLocation LParenLoc, SourceLocation EndLoc); /// Called on well-formed 'default' clause. - OMPClause *ActOnOpenMPDefaultClause(llvm::omp::DefaultKind Kind, - SourceLocation KindLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); + OMPClause * + ActOnOpenMPDefaultClause(llvm::omp::DefaultKind M, SourceLocation MLoc, + OpenMPDefaultClauseVariableCategory VCKind, + SourceLocation VCKindLoc, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc); /// Called on well-formed 'proc_bind' clause. OMPClause *ActOnOpenMPProcBindClause(llvm::omp::ProcBindKind Kind, SourceLocation KindLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 0930ca27c29f8..69d33019c0952 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1911,8 +1911,13 @@ void OMPClausePrinter::VisitOMPDetachClause(OMPDetachClause *Node) { void OMPClausePrinter::VisitOMPDefaultClause(OMPDefaultClause *Node) { OS << "default(" << getOpenMPSimpleClauseTypeName(OMPC_default, - unsigned(Node->getDefaultKind())) - << ")"; + unsigned(Node->getDefaultKind())); + if (Version >= 60 && Node->getDefaultVC() != OMPC_DEFAULT_VC_all) { + OS << ":" + << getOpenMPDefaultVariableCategoryName(unsigned(Node->getDefaultVC())); + } + + OS << ")"; } void OMPClausePrinter::VisitOMPProcBindClause(OMPProcBindClause *Node) { diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 3f8f64df8702e..3255da8548cbc 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -20,6 +20,26 @@ using namespace clang; using namespace llvm::omp; +OpenMPDefaultClauseVariableCategory +clang::getOpenMPDefaultVariableCategory(StringRef Str, + const LangOptions &LangOpts) { + return llvm::StringSwitch(Str) +#define OPENMP_DEFAULT_VARIABLE_CATEGORY(Name) \ + .Case(#Name, OMPC_DEFAULT_VC_##Name) +#include "clang/Basic/OpenMPKinds.def" + .Default(OMPC_DEFAULT_VC_unknown); +} + +const char *clang::getOpenMPDefaultVariableCategoryName(unsigned VC) { + switch (VC) { +#define OPENMP_DEFAULT_VARIABLE_CATEGORY(Name) \ + case OMPC_DEFAULT_VC_##Name: \ + return #Name; +#include "clang/Basic/OpenMPKinds.def" + } + llvm_unreachable("Invalid Variable Category in the default clause"); +} + unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, const LangOptions &LangOpts) { switch (Kind) { @@ -902,4 +922,3 @@ bool clang::checkFailClauseParameter(OpenMPClauseKind FailClauseParameter) { FailClauseParameter == llvm::omp::OMPC_relaxed || FailClauseParameter == llvm::omp::OMPC_seq_cst; } - diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 5db2f2e2ccf86..7dceb2d208352 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3083,7 +3083,6 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective); break; case OMPC_fail: - case OMPC_default: case OMPC_proc_bind: case OMPC_atomic_default_mem_order: case OMPC_at: @@ -3115,6 +3114,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_schedule: case OMPC_dist_schedule: case OMPC_defaultmap: + case OMPC_default: case OMPC_order: // OpenMP [2.7.1, Restrictions, p. 3] // Only one schedule clause can appear on a loop directive. @@ -3734,6 +3734,32 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind, ConsumeAnyToken(); if (Arg.back() == OMPC_DIST_SCHEDULE_static && Tok.is(tok::comma)) DelimLoc = ConsumeAnyToken(); + } else if (Kind == OMPC_default) { + // Get a default modifier + unsigned Modifier = getOpenMPSimpleClauseType( + Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok), getLangOpts()); + + Arg.push_back(Modifier); + KLoc.push_back(Tok.getLocation()); + if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) && + Tok.isNot(tok::annot_pragma_openmp_end)) + ConsumeAnyToken(); + // Parse ':' + if (Tok.is(tok::colon) && getLangOpts().OpenMP >= 60) { + ConsumeAnyToken(); + // Get a variable-category attribute for default clause modifier + OpenMPDefaultClauseVariableCategory VariableCategory = + getOpenMPDefaultVariableCategory( + Tok.isAnnotation() ? "" : PP.getSpelling(Tok), getLangOpts()); + Arg.push_back(VariableCategory); + KLoc.push_back(Tok.getLocation()); + if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) && + Tok.isNot(tok::annot_pragma_openmp_end)) + ConsumeAnyToken(); + } else { + Arg.push_back(OMPC_DEFAULT_VC_all); + KLoc.push_back(SourceLocation()); + } } else if (Kind == OMPC_defaultmap) { // Get a defaultmap modifier unsigned Modifier = getOpenMPSimpleClauseType( @@ -3932,6 +3958,18 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind, if (NeedAnExpression && Val.isInvalid()) return nullptr; + if (Kind == OMPC_default && getLangOpts().OpenMP < 51 && Arg[0] && + (static_cast(Arg[0]) == OMP_DEFAULT_private || + static_cast(Arg[0]) == OMP_DEFAULT_firstprivate)) { + Diag(KLoc[0], diag::err_omp_invalid_dsa) + << getOpenMPClauseName(static_cast(Arg[0]) == + OMP_DEFAULT_private + ? OMPC_private + : OMPC_firstprivate) + << getOpenMPClauseName(OMPC_default) << "5.1"; + return nullptr; + } + if (ParseOnly) return nullptr; return Actions.OpenMP().ActOnOpenMPSingleExprWithArgClause( diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 63a56a6583efc..66bdd6e72d827 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -73,6 +73,18 @@ enum DefaultDataSharingAttributes { DSA_firstprivate = 1 << 3, /// Default data sharing attribute 'firstprivate'. }; +/// Variable Category attributes to restrict the modifier of the +/// default clause (DefaultDataSharingAttributes) +/// Not mentioning any Variable category attribute indicates +/// the modifier (DefaultDataSharingAttributes) is for all variables. +enum DefaultDataSharingVCAttributes { + DSA_VC_all = 0, /// for all variables. + DSA_VC_aggregate, /// for aggregate variables. + DSA_VC_allocatable, /// for allocatable variables. + DSA_VC_pointer, /// for pointer variables. + DSA_VC_scalar, /// for scalar variables. +}; + /// Stack for tracking declarations used in OpenMP directives and /// clauses and their data-sharing attributes. class DSAStackTy { @@ -168,6 +180,8 @@ class DSAStackTy { LoopControlVariablesMapTy LCVMap; DefaultDataSharingAttributes DefaultAttr = DSA_unspecified; SourceLocation DefaultAttrLoc; + DefaultDataSharingVCAttributes DefaultVCAttr = DSA_VC_all; + SourceLocation DefaultAttrVCLoc; DefaultmapInfo DefaultmapMap[OMPC_DEFAULTMAP_unknown + 1]; OpenMPDirectiveKind Directive = OMPD_unknown; DeclarationNameInfo DirectiveName; @@ -735,6 +749,31 @@ class DSAStackTy { getTopOfStack().DefaultAttr = DSA_firstprivate; getTopOfStack().DefaultAttrLoc = Loc; } + /// Set default data sharing variable category attribute to aggregate. + void setDefaultDSAVCAggregate(SourceLocation VCLoc) { + getTopOfStack().DefaultVCAttr = DSA_VC_aggregate; + getTopOfStack().DefaultAttrVCLoc = VCLoc; + } + /// Set default data sharing variable category attribute to all. + void setDefaultDSAVCAll(SourceLocation VCLoc) { + getTopOfStack().DefaultVCAttr = DSA_VC_all; + getTopOfStack().DefaultAttrVCLoc = VCLoc; + } + /// Set default data sharing variable category attribute to allocatable. + void setDefaultDSAVCAllocatable(SourceLocation VCLoc) { + getTopOfStack().DefaultVCAttr = DSA_VC_allocatable; + getTopOfStack().DefaultAttrVCLoc = VCLoc; + } + /// Set default data sharing variable category attribute to pointer. + void setDefaultDSAVCPointer(SourceLocation VCLoc) { + getTopOfStack().DefaultVCAttr = DSA_VC_pointer; + getTopOfStack().DefaultAttrVCLoc = VCLoc; + } + /// Set default data sharing variable category attribute to scalar. + void setDefaultDSAVCScalar(SourceLocation VCLoc) { + getTopOfStack().DefaultVCAttr = DSA_VC_scalar; + getTopOfStack().DefaultAttrVCLoc = VCLoc; + } /// Set default data mapping attribute to Modifier:Kind void setDefaultDMAAttr(OpenMPDefaultmapClauseModifier M, OpenMPDefaultmapClauseKind Kind, SourceLocation Loc) { @@ -1326,11 +1365,34 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter, return DVar; } + DefaultDataSharingAttributes IterDA = Iter->DefaultAttr; + switch (Iter->DefaultVCAttr) { + case DSA_VC_aggregate: + if (!VD->getType()->isAggregateType()) + IterDA = DSA_none; + break; + case DSA_VC_allocatable: + if (!(VD->getType()->isPointerType() || + VD->getType()->isVariableArrayType())) + IterDA = DSA_none; + break; + case DSA_VC_pointer: + if (!VD->getType()->isPointerType()) + IterDA = DSA_none; + break; + case DSA_VC_scalar: + if (!VD->getType()->isScalarType()) + IterDA = DSA_none; + break; + case DSA_VC_all: + break; + } + // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced // in a Construct, C/C++, implicitly determined, p.1] // In a parallel or task construct, the data-sharing attributes of these // variables are determined by the default clause, if present. - switch (Iter->DefaultAttr) { + switch (IterDA) { case DSA_shared: DVar.CKind = OMPC_shared; DVar.ImplicitDSALoc = Iter->DefaultAttrLoc; @@ -16265,10 +16327,6 @@ OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause( SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { OMPClause *Res = nullptr; switch (Kind) { - case OMPC_default: - Res = ActOnOpenMPDefaultClause(static_cast(Argument), - ArgumentLoc, StartLoc, LParenLoc, EndLoc); - break; case OMPC_proc_bind: Res = ActOnOpenMPProcBindClause(static_cast(Argument), ArgumentLoc, StartLoc, LParenLoc, EndLoc); @@ -16349,6 +16407,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause( case OMPC_num_tasks: case OMPC_hint: case OMPC_dist_schedule: + case OMPC_default: case OMPC_defaultmap: case OMPC_unknown: case OMPC_uniform: @@ -16382,38 +16441,58 @@ OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause( return Res; } -OMPClause *SemaOpenMP::ActOnOpenMPDefaultClause(DefaultKind Kind, - SourceLocation KindKwLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { - if (Kind == OMP_DEFAULT_unknown) { - Diag(KindKwLoc, diag::err_omp_unexpected_clause_value) +OMPClause *SemaOpenMP::ActOnOpenMPDefaultClause( + llvm::omp::DefaultKind M, SourceLocation MLoc, + OpenMPDefaultClauseVariableCategory VCKind, SourceLocation VCKindLoc, + SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { + if (M == OMP_DEFAULT_unknown) { + Diag(MLoc, diag::err_omp_unexpected_clause_value) << getListOfPossibleValues(OMPC_default, /*First=*/0, /*Last=*/unsigned(OMP_DEFAULT_unknown)) << getOpenMPClauseNameForDiag(OMPC_default); return nullptr; } - switch (Kind) { + switch (M) { case OMP_DEFAULT_none: - DSAStack->setDefaultDSANone(KindKwLoc); + DSAStack->setDefaultDSANone(MLoc); break; case OMP_DEFAULT_shared: - DSAStack->setDefaultDSAShared(KindKwLoc); + DSAStack->setDefaultDSAShared(MLoc); break; case OMP_DEFAULT_firstprivate: - DSAStack->setDefaultDSAFirstPrivate(KindKwLoc); + DSAStack->setDefaultDSAFirstPrivate(MLoc); break; case OMP_DEFAULT_private: - DSAStack->setDefaultDSAPrivate(KindKwLoc); + DSAStack->setDefaultDSAPrivate(MLoc); break; default: llvm_unreachable("DSA unexpected in OpenMP default clause"); } + switch (VCKind) { + case OMPC_DEFAULT_VC_aggregate: + DSAStack->setDefaultDSAVCAggregate(VCKindLoc); + break; + case OMPC_DEFAULT_VC_all: + DSAStack->setDefaultDSAVCAll(VCKindLoc); + break; + case OMPC_DEFAULT_VC_allocatable: + DSAStack->setDefaultDSAVCAllocatable(VCKindLoc); + break; + case OMPC_DEFAULT_VC_pointer: + DSAStack->setDefaultDSAVCPointer(VCKindLoc); + break; + case OMPC_DEFAULT_VC_scalar: + DSAStack->setDefaultDSAVCScalar(VCKindLoc); + break; + default: + Diag(VCKindLoc, diag::err_omp_default_vc) + << getOpenMPSimpleClauseTypeName(OMPC_default, unsigned(M)); + } + return new (getASTContext()) - OMPDefaultClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); + OMPDefaultClause(M, MLoc, VCKind, VCKindLoc, StartLoc, LParenLoc, EndLoc); } OMPClause *SemaOpenMP::ActOnOpenMPProcBindClause(ProcBindKind Kind, @@ -16742,6 +16821,15 @@ OMPClause *SemaOpenMP::ActOnOpenMPSingleExprWithArgClause( static_cast(Argument.back()), Expr, StartLoc, LParenLoc, ArgumentLoc.back(), DelimLoc, EndLoc); break; + case OMPC_default: + enum { DefaultModifier, DefaultVarCategory }; + Res = ActOnOpenMPDefaultClause( + static_cast(Argument[DefaultModifier]), + ArgumentLoc[DefaultModifier], + static_cast( + Argument[DefaultVarCategory]), + ArgumentLoc[DefaultVarCategory], StartLoc, LParenLoc, EndLoc); + break; case OMPC_defaultmap: enum { Modifier, DefaultmapKind }; Res = ActOnOpenMPDefaultmapClause( @@ -16790,7 +16878,6 @@ OMPClause *SemaOpenMP::ActOnOpenMPSingleExprWithArgClause( case OMPC_sizes: case OMPC_allocator: case OMPC_collapse: - case OMPC_default: case OMPC_proc_bind: case OMPC_private: case OMPC_firstprivate: diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 6136937210978..242ffb09af006 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1810,11 +1810,13 @@ class TreeTransform { /// By default, performs semantic analysis to build the new OpenMP clause. /// Subclasses may override this routine to provide different behavior. OMPClause *RebuildOMPDefaultClause(DefaultKind Kind, SourceLocation KindKwLoc, + OpenMPDefaultClauseVariableCategory VCKind, + SourceLocation VCLoc, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { return getSema().OpenMP().ActOnOpenMPDefaultClause( - Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); + Kind, KindKwLoc, VCKind, VCLoc, StartLoc, LParenLoc, EndLoc); } /// Build a new OpenMP 'proc_bind' clause. @@ -10512,8 +10514,9 @@ template OMPClause * TreeTransform::TransformOMPDefaultClause(OMPDefaultClause *C) { return getDerived().RebuildOMPDefaultClause( - C->getDefaultKind(), C->getDefaultKindKwLoc(), C->getBeginLoc(), - C->getLParenLoc(), C->getEndLoc()); + C->getDefaultKind(), C->getDefaultKindKwLoc(), C->getDefaultVC(), + C->getDefaultVCLoc(), C->getBeginLoc(), C->getLParenLoc(), + C->getEndLoc()); } template diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 5f40e94074702..9ee8a0fb0f060 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11632,6 +11632,9 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) { C->setDefaultKind(static_cast(Record.readInt())); C->setLParenLoc(Record.readSourceLocation()); C->setDefaultKindKwLoc(Record.readSourceLocation()); + C->setDefaultVariableCategory( + Record.readEnum()); + C->setDefaultVariableCategoryLocation(Record.readSourceLocation()); } void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 15a3ed4c427f8..2aa77934c08d1 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7875,6 +7875,8 @@ void OMPClauseWriter::VisitOMPDefaultClause(OMPDefaultClause *C) { Record.push_back(unsigned(C->getDefaultKind())); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getDefaultKindKwLoc()); + Record.push_back(unsigned(C->getDefaultVC())); + Record.AddSourceLocation(C->getDefaultVCLoc()); } void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) { diff --git a/clang/test/OpenMP/parallel_ast_print.cpp b/clang/test/OpenMP/parallel_ast_print.cpp index 15439ea31215a..28dc611bf864d 100644 --- a/clang/test/OpenMP/parallel_ast_print.cpp +++ b/clang/test/OpenMP/parallel_ast_print.cpp @@ -21,11 +21,19 @@ // RUN: %clang_cc1 -DOMP60 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -ast-print %s | FileCheck -check-prefixes=CHECK,OMP60 %s // RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck -check-prefixes=CHECK,OMP60 %s +// RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -verify -Wno-vla %s -ast-dump | FileCheck -check-prefixes=OMP60_DUMP %s // expected-no-diagnostics #ifndef HEADER #define HEADER +#ifdef OMP60 +int global; +int global2; + +void bar(int j) { }; +#endif + void foo() {} struct S1 { @@ -185,6 +193,32 @@ T tmain(T argc, T *argv) { return 0; } + +#ifdef OMP60 +// OMP60_DUMP: FunctionDecl {{.*}}mainVC {{.*}} +// OMP60_DUMP: OMPParallelDirective {{.*}} +// OMP60_DUMP-NEXT: OMPSharedClause{{.*}} +// OMP60_DUMP-NEXT: {{.*}}DeclRefExpr{{.*}} 'global' 'int'{{.*}} +// OMP60_DUMP-NEXT: OMPDefaultClause {{.*}} +// OMP60_DUMP-NEXT: OMPFirstprivateClause{{.*}} +// OMP60_DUMP-NEXT: {{.*}}DeclRefExpr{{.*}} 'h' 'int[20]'{{.*}} +// OMP60_DUMP: OMPParallelDirective {{.*}} +// OMP60_DUMP-NEXT: OMPPrivateClause{{.*}} +// OMP60_DUMP-NEXT: {{.*}}DeclRefExpr{{.*}} 'global2' 'int'{{.*}} +// OMP60_DUMP-NEXT: OMPDefaultClause {{.*}} +// OMP60_DUMP-NEXT: OMPPrivateClause {{.*}} +// OMP60_DUMP-NEXT: {{.*}}DeclRefExpr{{.*}} 'j' 'int'{{.*}} +int mainVC(int argc, int *argv) { + int h[20]; + int j; +#pragma omp parallel shared(global) default(firstprivate: aggregate) + bar(h[1]), h[1] = global; +#pragma omp parallel private(global2) default(private: scalar) + bar(global2), j = global2; + return 0; +} +#endif + // CHECK: template T tmain(T argc, T *argv) { // CHECK-NEXT: T b = argc, c, d, e, f, g; // CHECK-NEXT: static T a; @@ -237,6 +271,14 @@ T tmain(T argc, T *argv) { // OMP60-NEXT: #pragma omp parallel if(1) num_threads(strict: s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(default, &&: g) reduction(task, +: argc) message("msg") severity(warning) // OMP60-NEXT: foo() +// OMP60: int mainVC(int argc, int *argv) { +// OMP60-NEXT: int h[20]; +// OMP60-NEXT: int j; +// OMP60-NEXT: #pragma omp parallel shared(global) default(firstprivate:aggregate) +// OMP60-NEXT: bar(h[1]) , h[1] = global; +// OMP60-NEXT: #pragma omp parallel private(global2) default(private:scalar) +// OMP60-NEXT: bar(global2) , j = global2; + enum Enum { }; int main (int argc, char **argv) { diff --git a/clang/test/OpenMP/parallel_default_messages.cpp b/clang/test/OpenMP/parallel_default_messages.cpp index 37d3b5565f83c..842b1ac5a96b8 100644 --- a/clang/test/OpenMP/parallel_default_messages.cpp +++ b/clang/test/OpenMP/parallel_default_messages.cpp @@ -6,6 +6,7 @@ // RUN: %clang_cc1 -verify -fopenmp-version=30 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,ge40 -fopenmp -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,ge40 -fopenmp-simd -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,ge40 -fopenmp-version=60 -fopenmp -DOMP60 -ferror-limit 100 -o - %s -Wuninitialized void foo(); @@ -47,6 +48,23 @@ int main(int argc, char **argv) { } #endif +#ifdef OMP60 +#pragma omp parallel default(shared:) private(x,y) // expected-error {{wrong variable category specified with modifier shared in the default clause}} + { + ++x; + ++y; + } +#pragma omp parallel default(shared: junk) private(x,y) // expected-error {{wrong variable category specified with modifier shared in the default clause}} + { + ++x; + ++y; + } +#pragma omp parallel default(firstprivate: junk) private(x,y) // expected-error {{wrong variable category specified with modifier firstprivate in the default clause}} + { + ++x; + ++y; + } +#endif return 0; } From 9d5556377c6e716b94747ad1db089dad19890f4e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 14 Sep 2025 22:25:15 -0700 Subject: [PATCH 294/734] [RISCV][GISel] Remove unnecessary copy from X0 in G_FCONSTANT selection. (#158429) Instead of calling materializeImm, just assign GPRReg to X0. While there, move conversion to APInt to only where it is necessary. --- .../RISCV/GISel/RISCVInstructionSelector.cpp | 16 +++++++++++----- .../instruction-select/fp-constant-f16.mir | 3 +-- .../instruction-select/fp-constant.mir | 6 ++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 7df1b7e580002..4330d4e91e0ee 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -738,12 +738,17 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { // TODO: Use constant pool for complex constants. Register DstReg = MI.getOperand(0).getReg(); const APFloat &FPimm = MI.getOperand(1).getFPImm()->getValueAPF(); - APInt Imm = FPimm.bitcastToAPInt(); unsigned Size = MRI->getType(DstReg).getSizeInBits(); if (Size == 16 || Size == 32 || (Size == 64 && Subtarget->is64Bit())) { - Register GPRReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); - if (!materializeImm(GPRReg, Imm.getSExtValue(), MIB)) - return false; + Register GPRReg; + if (FPimm.isPosZero()) { + GPRReg = RISCV::X0; + } else { + GPRReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); + APInt Imm = FPimm.bitcastToAPInt(); + if (!materializeImm(GPRReg, Imm.getSExtValue(), MIB)) + return false; + } unsigned Opcode = Size == 64 ? RISCV::FMV_D_X : Size == 32 ? RISCV::FMV_W_X @@ -756,7 +761,7 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { assert(Size == 64 && !Subtarget->is64Bit() && "Unexpected size or subtarget"); - if (Imm.isNonNegative() && Imm.isZero()) { + if (FPimm.isPosZero()) { // Optimize +0.0 to use fcvt.d.w MachineInstrBuilder FCVT = MIB.buildInstr(RISCV::FCVT_D_W, {DstReg}, {Register(RISCV::X0)}) @@ -771,6 +776,7 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { // Split into two pieces and build through the stack. Register GPRRegHigh = MRI->createVirtualRegister(&RISCV::GPRRegClass); Register GPRRegLow = MRI->createVirtualRegister(&RISCV::GPRRegClass); + APInt Imm = FPimm.bitcastToAPInt(); if (!materializeImm(GPRRegHigh, Imm.extractBits(32, 32).getSExtValue(), MIB)) return false; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-constant-f16.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-constant-f16.mir index 3028b6476e20b..a688153d44be5 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-constant-f16.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-constant-f16.mir @@ -57,8 +57,7 @@ body: | ; CHECK-LABEL: name: half_positive_zero ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 - ; CHECK-NEXT: [[FMV_H_X:%[0-9]+]]:fpr16 = FMV_H_X [[COPY]] + ; CHECK-NEXT: [[FMV_H_X:%[0-9]+]]:fpr16 = FMV_H_X $x0 ; CHECK-NEXT: $f10_h = COPY [[FMV_H_X]] ; CHECK-NEXT: PseudoRET implicit $f10_h %1:fprb(s16) = G_FCONSTANT half 0.000000e+00 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-constant.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-constant.mir index 4db80c6c1141f..7dde7771f161b 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-constant.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-constant.mir @@ -56,8 +56,7 @@ body: | ; CHECK-LABEL: name: float_positive_zero ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 - ; CHECK-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] + ; CHECK-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X $x0 ; CHECK-NEXT: $f10_f = COPY [[FMV_W_X]] ; CHECK-NEXT: PseudoRET implicit $f10_f %1:fprb(s32) = G_FCONSTANT float 0.000000e+00 @@ -171,8 +170,7 @@ body: | ; RV64-LABEL: name: double_positive_zero ; RV64: liveins: $x10 ; RV64-NEXT: {{ $}} - ; RV64-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 - ; RV64-NEXT: [[FMV_D_X:%[0-9]+]]:fpr64 = FMV_D_X [[COPY]] + ; RV64-NEXT: [[FMV_D_X:%[0-9]+]]:fpr64 = FMV_D_X $x0 ; RV64-NEXT: $f10_d = COPY [[FMV_D_X]] ; RV64-NEXT: PseudoRET implicit $f10_d %1:fprb(s64) = G_FCONSTANT double 0.000000e+00 From 03c356c5544fad1890cc9abd0c1a9b7bab6f2a6b Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Mon, 15 Sep 2025 07:39:11 +0200 Subject: [PATCH 295/734] [clang][bytecode] Pass initializer along in `evaluateAsInitializer()` (#158056) We just called `getInit()`, which isn't always correct and used the wrong initializer in the module test case. --- clang/lib/AST/ByteCode/ByteCodeEmitter.h | 3 ++- clang/lib/AST/ByteCode/Compiler.cpp | 21 ++++++++++----------- clang/lib/AST/ByteCode/Compiler.h | 6 ++++-- clang/lib/AST/ByteCode/Context.cpp | 4 ++-- clang/lib/AST/ByteCode/Context.h | 3 ++- clang/lib/AST/ByteCode/EvalEmitter.cpp | 7 ++++--- clang/lib/AST/ByteCode/EvalEmitter.h | 6 ++++-- clang/lib/AST/ExprConstant.cpp | 2 +- clang/test/Modules/added-visible-decls.cppm | 1 + 9 files changed, 30 insertions(+), 23 deletions(-) diff --git a/clang/lib/AST/ByteCode/ByteCodeEmitter.h b/clang/lib/AST/ByteCode/ByteCodeEmitter.h index d29db66325412..c050b299d8f61 100644 --- a/clang/lib/AST/ByteCode/ByteCodeEmitter.h +++ b/clang/lib/AST/ByteCode/ByteCodeEmitter.h @@ -46,7 +46,8 @@ class ByteCodeEmitter { /// Methods implemented by the compiler. virtual bool visitFunc(const FunctionDecl *E) = 0; virtual bool visitExpr(const Expr *E, bool DestroyToplevelScope) = 0; - virtual bool visitDeclAndReturn(const VarDecl *E, bool ConstantContext) = 0; + virtual bool visitDeclAndReturn(const VarDecl *VD, const Expr *Init, + bool ConstantContext) = 0; virtual bool visit(const Expr *E) = 0; virtual bool emitBool(bool V, const Expr *E) = 0; diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 3f7db39281358..78b74acc3789d 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -4714,7 +4714,8 @@ template VarCreationState Compiler::visitDecl(const VarDecl *VD, bool IsConstexprUnknown) { - auto R = this->visitVarDecl(VD, /*Toplevel=*/true, IsConstexprUnknown); + auto R = this->visitVarDecl(VD, VD->getInit(), /*Toplevel=*/true, + IsConstexprUnknown); if (R.notCreated()) return R; @@ -4740,14 +4741,12 @@ VarCreationState Compiler::visitDecl(const VarDecl *VD, /// We get here from evaluateAsInitializer(). /// We need to evaluate the initializer and return its value. template -bool Compiler::visitDeclAndReturn(const VarDecl *VD, +bool Compiler::visitDeclAndReturn(const VarDecl *VD, const Expr *Init, bool ConstantContext) { - // We only create variables if we're evaluating in a constant context. // Otherwise, just evaluate the initializer and return it. if (!ConstantContext) { DeclScope LS(this, VD); - const Expr *Init = VD->getInit(); if (!this->visit(Init)) return false; return this->emitRet(classify(Init).value_or(PT_Ptr), VD) && @@ -4755,7 +4754,7 @@ bool Compiler::visitDeclAndReturn(const VarDecl *VD, } LocalScope VDScope(this, VD); - if (!this->visitVarDecl(VD, /*Toplevel=*/true)) + if (!this->visitVarDecl(VD, Init, /*Toplevel=*/true)) return false; OptPrimType VarT = classify(VD->getType()); @@ -4802,9 +4801,9 @@ bool Compiler::visitDeclAndReturn(const VarDecl *VD, } template -VarCreationState Compiler::visitVarDecl(const VarDecl *VD, - bool Toplevel, - bool IsConstexprUnknown) { +VarCreationState +Compiler::visitVarDecl(const VarDecl *VD, const Expr *Init, + bool Toplevel, bool IsConstexprUnknown) { // We don't know what to do with these, so just return false. if (VD->getType().isNull()) return false; @@ -4814,7 +4813,6 @@ VarCreationState Compiler::visitVarDecl(const VarDecl *VD, if (!this->isActive()) return VarCreationState::NotCreated(); - const Expr *Init = VD->getInit(); OptPrimType VarT = classify(VD->getType()); if (Init && Init->isValueDependent()) @@ -5488,7 +5486,8 @@ template bool Compiler::maybeEmitDeferredVarInit(const VarDecl *VD) { if (auto *DD = dyn_cast_if_present(VD)) { for (auto *BD : DD->flat_bindings()) - if (auto *KD = BD->getHoldingVar(); KD && !this->visitVarDecl(KD)) + if (auto *KD = BD->getHoldingVar(); + KD && !this->visitVarDecl(KD, KD->getInit())) return false; } return true; @@ -5552,7 +5551,7 @@ bool Compiler::visitDeclStmt(const DeclStmt *DS, const auto *VD = dyn_cast(D); if (!VD) return false; - if (!this->visitVarDecl(VD)) + if (!this->visitVarDecl(VD, VD->getInit())) return false; // Register decomposition decl holding vars. diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index c97dc18656ce4..5f392964c076a 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -251,7 +251,8 @@ class Compiler : public ConstStmtVisitor, bool>, bool visitExpr(const Expr *E, bool DestroyToplevelScope) override; bool visitFunc(const FunctionDecl *F) override; - bool visitDeclAndReturn(const VarDecl *VD, bool ConstantContext) override; + bool visitDeclAndReturn(const VarDecl *VD, const Expr *Init, + bool ConstantContext) override; protected: /// Emits scope cleanup instructions. @@ -303,7 +304,8 @@ class Compiler : public ConstStmtVisitor, bool>, /// intact. bool delegate(const Expr *E); /// Creates and initializes a variable from the given decl. - VarCreationState visitVarDecl(const VarDecl *VD, bool Toplevel = false, + VarCreationState visitVarDecl(const VarDecl *VD, const Expr *Init, + bool Toplevel = false, bool IsConstexprUnknown = false); VarCreationState visitDecl(const VarDecl *VD, bool IsConstexprUnknown = false); diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index 8598996681466..6e6c60925a70f 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -126,7 +126,7 @@ bool Context::evaluate(State &Parent, const Expr *E, APValue &Result, } bool Context::evaluateAsInitializer(State &Parent, const VarDecl *VD, - APValue &Result) { + const Expr *Init, APValue &Result) { ++EvalID; bool Recursing = !Stk.empty(); size_t StackSizeBefore = Stk.size(); @@ -135,7 +135,7 @@ bool Context::evaluateAsInitializer(State &Parent, const VarDecl *VD, bool CheckGlobalInitialized = shouldBeGloballyIndexed(VD) && (VD->getType()->isRecordType() || VD->getType()->isArrayType()); - auto Res = C.interpretDecl(VD, CheckGlobalInitialized); + auto Res = C.interpretDecl(VD, Init, CheckGlobalInitialized); if (Res.isInvalid()) { C.cleanup(); Stk.clearTo(StackSizeBefore); diff --git a/clang/lib/AST/ByteCode/Context.h b/clang/lib/AST/ByteCode/Context.h index fa98498dbe8fa..280a31725555f 100644 --- a/clang/lib/AST/ByteCode/Context.h +++ b/clang/lib/AST/ByteCode/Context.h @@ -59,7 +59,8 @@ class Context final { ConstantExprKind Kind); /// Evaluates a toplevel initializer. - bool evaluateAsInitializer(State &Parent, const VarDecl *VD, APValue &Result); + bool evaluateAsInitializer(State &Parent, const VarDecl *VD, const Expr *Init, + APValue &Result); bool evaluateCharRange(State &Parent, const Expr *SizeExpr, const Expr *PtrExpr, APValue &Result); diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp index e349397078aa3..1d73f0e247aa2 100644 --- a/clang/lib/AST/ByteCode/EvalEmitter.cpp +++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp @@ -49,14 +49,15 @@ EvaluationResult EvalEmitter::interpretExpr(const Expr *E, return std::move(this->EvalResult); } -EvaluationResult EvalEmitter::interpretDecl(const VarDecl *VD, +EvaluationResult EvalEmitter::interpretDecl(const VarDecl *VD, const Expr *Init, bool CheckFullyInitialized) { this->CheckFullyInitialized = CheckFullyInitialized; S.EvaluatingDecl = VD; S.setEvalLocation(VD->getLocation()); EvalResult.setSource(VD); - if (const Expr *Init = VD->getAnyInitializer()) { + // FIXME: I think Init is never null. + if (Init) { QualType T = VD->getType(); this->ConvertResultToRValue = !Init->isGLValue() && !T->isPointerType() && !T->isObjCObjectPointerType(); @@ -65,7 +66,7 @@ EvaluationResult EvalEmitter::interpretDecl(const VarDecl *VD, EvalResult.setSource(VD); - if (!this->visitDeclAndReturn(VD, S.inConstantContext())) + if (!this->visitDeclAndReturn(VD, Init, S.inConstantContext())) EvalResult.setInvalid(); S.EvaluatingDecl = nullptr; diff --git a/clang/lib/AST/ByteCode/EvalEmitter.h b/clang/lib/AST/ByteCode/EvalEmitter.h index 85a0a99fbb4b0..e81ea67adf97a 100644 --- a/clang/lib/AST/ByteCode/EvalEmitter.h +++ b/clang/lib/AST/ByteCode/EvalEmitter.h @@ -37,7 +37,8 @@ class EvalEmitter : public SourceMapper { EvaluationResult interpretExpr(const Expr *E, bool ConvertResultToRValue = false, bool DestroyToplevelScope = false); - EvaluationResult interpretDecl(const VarDecl *VD, bool CheckFullyInitialized); + EvaluationResult interpretDecl(const VarDecl *VD, const Expr *Init, + bool CheckFullyInitialized); /// Interpret the given Expr to a Pointer. EvaluationResult interpretAsPointer(const Expr *E, PtrCallback PtrCB); /// Interpret the given expression as if it was in the body of the given @@ -59,7 +60,8 @@ class EvalEmitter : public SourceMapper { /// Methods implemented by the compiler. virtual bool visitExpr(const Expr *E, bool DestroyToplevelScope) = 0; - virtual bool visitDeclAndReturn(const VarDecl *VD, bool ConstantContext) = 0; + virtual bool visitDeclAndReturn(const VarDecl *VD, const Expr *Init, + bool ConstantContext) = 0; virtual bool visitFunc(const FunctionDecl *F) = 0; virtual bool visit(const Expr *E) = 0; virtual bool emitBool(bool V, const Expr *E) = 0; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 5145896930153..820b053057067 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -17755,7 +17755,7 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, if (Info.EnableNewConstInterp) { auto &InterpCtx = const_cast(Ctx).getInterpContext(); - if (!InterpCtx.evaluateAsInitializer(Info, VD, Value)) + if (!InterpCtx.evaluateAsInitializer(Info, VD, this, Value)) return false; return CheckConstantExpression(Info, DeclLoc, DeclTy, Value, diff --git a/clang/test/Modules/added-visible-decls.cppm b/clang/test/Modules/added-visible-decls.cppm index 2f387db452905..28df3bf6f8543 100644 --- a/clang/test/Modules/added-visible-decls.cppm +++ b/clang/test/Modules/added-visible-decls.cppm @@ -5,6 +5,7 @@ // RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface -o %t/b.pcm -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-reduced-module-interface -o %t/c.pcm -fprebuilt-module-path=%t // RUN: %clang_cc1 -std=c++20 %t/d.cpp -fprebuilt-module-path=%t -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/d.cpp -fprebuilt-module-path=%t -fsyntax-only -verify -fexperimental-new-constant-interpreter //--- a.h template From 6931bad36c0ddae441d115814022fd5d3cbc554b Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Mon, 15 Sep 2025 08:20:24 +0200 Subject: [PATCH 296/734] [clang][bytecode] Fix bit casts to IntAP types (#158509) They were left out. Fixes #153920 --- .../lib/AST/ByteCode/InterpBuiltinBitCast.cpp | 28 +++++++++---- clang/test/AST/ByteCode/builtin-bit-cast.cpp | 41 +++++++++++++++++++ 2 files changed, 62 insertions(+), 7 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp index feac97d4b1a69..4bd9c66fc9974 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -441,13 +441,27 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC, if (llvm::sys::IsBigEndianHost) swapBytes(Memory.get(), FullBitWidth.roundToBytes()); - BITCAST_TYPE_SWITCH_FIXED_SIZE(T, { - if (BitWidth.nonZero()) - P.deref() = T::bitcastFromMemory(Memory.get(), T::bitWidth()) - .truncate(BitWidth.getQuantity()); - else - P.deref() = T::zero(); - }); + if (T == PT_IntAPS) { + P.deref>() = + S.allocAP>(FullBitWidth.getQuantity()); + IntegralAP::bitcastFromMemory(Memory.get(), + FullBitWidth.getQuantity(), + &P.deref>()); + } else if (T == PT_IntAP) { + P.deref>() = + S.allocAP>(FullBitWidth.getQuantity()); + IntegralAP::bitcastFromMemory(Memory.get(), + FullBitWidth.getQuantity(), + &P.deref>()); + } else { + BITCAST_TYPE_SWITCH_FIXED_SIZE(T, { + if (BitWidth.nonZero()) + P.deref() = T::bitcastFromMemory(Memory.get(), T::bitWidth()) + .truncate(BitWidth.getQuantity()); + else + P.deref() = T::zero(); + }); + } P.initialize(); return true; }); diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp index bc356b0b6e122..fede780fd66ec 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp @@ -529,3 +529,44 @@ constexpr const intptr_t &returns_local() { return 0L; } // both-error@+2 {{constexpr variable 'test_nullptr_bad' must be initialized by a constant expression}} // both-note@+1 {{read of temporary whose lifetime has ended}} constexpr nullptr_t test_nullptr_bad = __builtin_bit_cast(nullptr_t, returns_local()); + +namespace VectorCast { + typedef unsigned X __attribute__ ((vector_size (64))); + typedef unsigned __int128 Y __attribute__ ((vector_size (64))); + constexpr int test() { + X x = {0}; + Y y = x; + + X x2 = y; + + return 0; + } + static_assert(test() == 0); + + typedef int X2 __attribute__ ((vector_size (64))); + typedef __int128 Y2 __attribute__ ((vector_size (64))); + constexpr int test2() { + X2 x = {0}; + Y2 y = x; + + X2 x2 = y; + + return 0; + } + static_assert(test2() == 0); + + struct S { + unsigned __int128 a : 3; + }; + constexpr S s = __builtin_bit_cast(S, (__int128)12); // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{constexpr bit_cast involving bit-field is not yet supported}} \ + // ref-note {{declared here}} +#if LITTLE_END + static_assert(s.a == 4); // ref-error {{not an integral constant expression}} \ + // ref-note {{initializer of 's' is not a constant expression}} +#else + static_assert(s.a == 0); // ref-error {{not an integral constant expression}} \ + // ref-note {{initializer of 's' is not a constant expression}} +#endif + +} From 1c21d5cb9b8e48ab928919a6f358eba8ffd3b49c Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 15 Sep 2025 07:32:00 +0100 Subject: [PATCH 297/734] [GlobalISel] Remove GI known bits cache (#157352) There is a cache on the known-bit computed by global-isel. It only works inside a single query to computeKnownBits, which limits its usefulness, and according to the tests can sometimes limit the effectiveness of known-bits queries. (Although some AMD tests look longer). Keeping the cache valid and clearing it at the correct times can also require being careful about the functions called inside known-bits queries. I measured compile-time of removing it and came up with: ``` 7zip 2.06405E+11 2.06436E+11 0.015018992 Bullet 1.01298E+11 1.01186E+11 -0.110236169 ClamAV 57942466667 57848066667 -0.16292023 SPASS 45444466667 45402966667 -0.091320249 consumer 35432466667 35381233333 -0.144594317 kimwitu++ 40858833333 40927933333 0.169118877 lencod 70022366667 69950633333 -0.102443457 mafft 38439900000 38413233333 -0.069372362 sqlite3 35822266667 35770033333 -0.145812474 tramp3d 82083133333 82045600000 -0.045726 Average -0.068828739 ``` The last column is % difference between with / without the cache. So in total it seems to be costing slightly more to keep the current known-bits cache than if it was removed. (Measured in instruction count, similar to llvm-compile-time-tracker). The hit rate wasn't terrible - higher than I expected. In the llvm-test-suite+external projects it was hit 4791030 times out of 91107008 queries, slightly more than 5%. Note that as globalisel increases in complexity, more known bits calls might be made and the numbers might shift. If that is the case it might be better to have a cache that works across calls, providing it doesn't make effectiveness worse. --- .../CodeGen/GlobalISel/GISelValueTracking.h | 2 - .../CodeGen/GlobalISel/GISelValueTracking.cpp | 25 -- llvm/test/CodeGen/AArch64/rem-by-const.ll | 4 +- .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 255 ++++++------ .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 382 +++++++++--------- .../RISCV/GlobalISel/div-by-constant.ll | 6 - .../CodeGen/GlobalISel/KnownBitsTest.cpp | 2 +- .../GlobalISel/KnownBitsVectorTest.cpp | 2 +- 8 files changed, 327 insertions(+), 351 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h index 490d1a34cc846..3bf9d694b1b21 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h @@ -37,8 +37,6 @@ class LLVM_ABI GISelValueTracking : public GISelChangeObserver { const TargetLowering &TL; const DataLayout &DL; unsigned MaxDepth; - /// Cache maintained during a computeKnownBits request. - SmallDenseMap ComputeKnownBitsCache; void computeKnownBitsMin(Register Src0, Register Src1, KnownBits &Known, const APInt &DemandedElts, unsigned Depth = 0); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 0cf44e02254de..9b4c103763d74 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -93,12 +93,8 @@ KnownBits GISelValueTracking::getKnownBits(Register R) { KnownBits GISelValueTracking::getKnownBits(Register R, const APInt &DemandedElts, unsigned Depth) { - // For now, we only maintain the cache during one request. - assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared"); - KnownBits Known; computeKnownBitsImpl(R, Known, DemandedElts, Depth); - ComputeKnownBitsCache.clear(); return Known; } @@ -187,14 +183,6 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, #endif unsigned BitWidth = DstTy.getScalarSizeInBits(); - auto CacheEntry = ComputeKnownBitsCache.find(R); - if (CacheEntry != ComputeKnownBitsCache.end()) { - Known = CacheEntry->second; - LLVM_DEBUG(dbgs() << "Cache hit at "); - LLVM_DEBUG(dumpResult(MI, Known, Depth)); - assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match"); - return; - } Known = KnownBits(BitWidth); // Don't know anything // Depth may get bigger than max depth if it gets passed to a different @@ -254,16 +242,6 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, // point of the pipeline, otherwise the main live-range will be // defined more than once, which is against SSA. assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?"); - // Record in the cache that we know nothing for MI. - // This will get updated later and in the meantime, if we reach that - // phi again, because of a loop, we will cut the search thanks to this - // cache entry. - // We could actually build up more information on the phi by not cutting - // the search, but that additional information is more a side effect - // than an intended choice. - // Therefore, for now, save on compile time until we derive a proper way - // to derive known bits for PHIs within loops. - ComputeKnownBitsCache[R] = KnownBits(BitWidth); // PHI's operand are a mix of registers and basic blocks interleaved. // We only care about the register ones. for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { @@ -700,9 +678,6 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, } LLVM_DEBUG(dumpResult(MI, Known, Depth)); - - // Update the cache. - ComputeKnownBitsCache[R] = Known; } static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) { diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll index 599fa510d4aea..1cb92e46cbcd1 100644 --- a/llvm/test/CodeGen/AArch64/rem-by-const.ll +++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll @@ -88,7 +88,7 @@ define i8 @ui8_7(i8 %a, i8 %b) { ; CHECK-GI-NEXT: sub w9, w0, w8 ; CHECK-GI-NEXT: ubfx w9, w9, #1, #7 ; CHECK-GI-NEXT: add w8, w9, w8 -; CHECK-GI-NEXT: ubfx w8, w8, #2, #6 +; CHECK-GI-NEXT: lsr w8, w8, #2 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 @@ -207,7 +207,7 @@ define i16 @ui16_7(i16 %a, i16 %b) { ; CHECK-GI-NEXT: sub w9, w0, w8 ; CHECK-GI-NEXT: ubfx w9, w9, #1, #15 ; CHECK-GI-NEXT: add w8, w9, w8 -; CHECK-GI-NEXT: ubfx w8, w8, #2, #14 +; CHECK-GI-NEXT: lsr w8, w8, #2 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index f57fc005b994b..9ffc565d9d47a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -1186,77 +1186,77 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s6, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v8, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0 -; GISEL-NEXT: v_mov_b32_e32 v9, v5 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10] -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v4 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[9:10] -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v4, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v10, v13 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 +; GISEL-NEXT: v_mov_b32_e32 v7, v5 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], s6, v8, v[10:11] +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v4, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v14, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v13 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v8, v4 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v9, v7, vcc ; GISEL-NEXT: v_mov_b32_e32 v4, v14 -; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] -; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13 +; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v7, v[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v7, v13 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15] ; GISEL-NEXT: s_mov_b32 s6, 1 ; GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v16, v14 ; GISEL-NEXT: s_subb_u32 s6, 0, 0 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v16, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v14 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v16, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13 -; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14 -; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_xor_b32_e32 v18, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v17 +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v14 +; GISEL-NEXT: v_xor_b32_e32 v19, v1, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v15, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc ; GISEL-NEXT: v_mul_lo_u32 v13, v19, v0 ; GISEL-NEXT: v_mul_lo_u32 v14, v18, v1 ; GISEL-NEXT: v_mul_hi_u32 v15, v18, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v19, v0 -; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb +; GISEL-NEXT: v_mov_b32_e32 v7, 0x12d8fb ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 @@ -1271,144 +1271,147 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v0, v13 ; GISEL-NEXT: v_mul_hi_u32 v16, v19, v1 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v15, 0 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v15, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v13 -; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v16, v[1:2] +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v7, v16, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v18, v0 +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], 0, v15, v[13:14] ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v19, v13, vcc ; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], v19, v13 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 ; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 ; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v15 ; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, v[0:1] ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v17, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v8, v0 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v11, v16, v5, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v10, v15, v13, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc -; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11 -; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0 -; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5 -; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_xor_b32_e32 v1, v10, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v8, v[5:6] +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v12, v2, v10 +; GISEL-NEXT: v_mul_lo_u32 v2, v9, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v5 +; GISEL-NEXT: v_xor_b32_e32 v13, v3, v10 +; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v3, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3 -; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2 -; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v11, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2 -; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v13, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v3, v5 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v5 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v9, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v11, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[5:6] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 -; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v7 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v7 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v5, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v11, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v6 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v6, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v7, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 19dc20c510041..82279e641ed63 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1112,67 +1112,67 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s6, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v8, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0 -; GISEL-NEXT: v_mov_b32_e32 v9, v5 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10] -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v4 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[9:10] -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v4, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v10, v13 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 +; GISEL-NEXT: v_mov_b32_e32 v7, v5 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], s6, v8, v[10:11] +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v4, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v14, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v13 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v8, v4 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc ; GISEL-NEXT: v_mov_b32_e32 v4, v14 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] ; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15] ; GISEL-NEXT: s_mov_b32 s6, 1 ; GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14 +; GISEL-NEXT: v_mul_lo_u32 v7, v16, v14 ; GISEL-NEXT: s_subb_u32 s6, 0, 0 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v16, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13 +; GISEL-NEXT: v_mul_hi_u32 v7, v17, v13 ; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v7 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v18, v0, v7 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14 -; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9 +; GISEL-NEXT: v_xor_b32_e32 v19, v1, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 @@ -1195,13 +1195,14 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_mul_hi_u32 v15, v19, v1 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v0, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v19, v1 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v15, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2] +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], 0, v15, v[13:14] ; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v18, v0 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13 ; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], v19, v13, vcc @@ -1217,94 +1218,96 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, v[0:1] ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v16, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v17, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v8, v0 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v5, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v10, v14, v16, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc -; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11 -; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0 -; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5 -; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_xor_b32_e32 v1, v10, v7 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v8, v[5:6] +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v12, v2, v10 +; GISEL-NEXT: v_mul_lo_u32 v2, v9, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v5 +; GISEL-NEXT: v_xor_b32_e32 v13, v3, v10 +; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v3, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3 -; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2 -; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v11, v7 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2 -; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v13, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v9, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v7 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], 0, v9, v[5:6] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v7, vcc ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc ; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 @@ -1327,10 +1330,10 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_pow2k_denom: @@ -1705,67 +1708,67 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s6, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v8, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0 -; GISEL-NEXT: v_mov_b32_e32 v9, v5 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10] -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v4 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[9:10] -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v4, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v10, v13 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 +; GISEL-NEXT: v_mov_b32_e32 v7, v5 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], s6, v8, v[10:11] +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v4, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v14, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v13 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v8, v4 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc ; GISEL-NEXT: v_mov_b32_e32 v4, v14 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] ; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15] ; GISEL-NEXT: s_mov_b32 s6, 1 ; GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14 +; GISEL-NEXT: v_mul_lo_u32 v7, v16, v14 ; GISEL-NEXT: s_subb_u32 s6, 0, 0 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v16, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13 +; GISEL-NEXT: v_mul_hi_u32 v7, v17, v13 ; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v7 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v18, v0, v7 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14 -; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9 +; GISEL-NEXT: v_xor_b32_e32 v19, v1, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 @@ -1788,13 +1791,14 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_mul_hi_u32 v15, v19, v1 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v0, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v19, v1 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v15, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 ; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2] +; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], 0, v15, v[13:14] ; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v18, v0 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13 ; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], v19, v13, vcc @@ -1810,94 +1814,96 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, v[0:1] ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v16, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v17, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v8, v0 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v5, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v10, v14, v16, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc -; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11 -; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0 -; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5 -; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 +; GISEL-NEXT: v_xor_b32_e32 v1, v10, v7 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v8, v[5:6] +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v12, v2, v10 +; GISEL-NEXT: v_mul_lo_u32 v2, v9, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v5 +; GISEL-NEXT: v_xor_b32_e32 v13, v3, v10 +; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v3, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3 -; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2 -; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v11, v7 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2 -; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v13, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v9, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v7 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], 0, v9, v[5:6] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v7, vcc ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc ; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 @@ -1920,10 +1926,10 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_oddk_denom: diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll index 6864afe3855f4..225ceed9627b7 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll @@ -240,7 +240,6 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV32-NEXT: zext.b a0, a0 ; RV32-NEXT: srli a0, a0, 1 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: zext.b a0, a0 ; RV32-NEXT: srli a0, a0, 2 ; RV32-NEXT: ret ; @@ -254,7 +253,6 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV64-NEXT: zext.b a0, a0 ; RV64-NEXT: srli a0, a0, 1 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: zext.b a0, a0 ; RV64-NEXT: srli a0, a0, 2 ; RV64-NEXT: ret %1 = udiv i8 %a, 7 @@ -317,7 +315,6 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV32IM-NEXT: and a0, a0, a2 ; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: add a0, a0, a1 -; RV32IM-NEXT: and a0, a0, a2 ; RV32IM-NEXT: srli a0, a0, 2 ; RV32IM-NEXT: ret ; @@ -332,7 +329,6 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV32IMZB-NEXT: zext.h a0, a0 ; RV32IMZB-NEXT: srli a0, a0, 1 ; RV32IMZB-NEXT: add a0, a0, a1 -; RV32IMZB-NEXT: zext.h a0, a0 ; RV32IMZB-NEXT: srli a0, a0, 2 ; RV32IMZB-NEXT: ret ; @@ -349,7 +345,6 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: srli a0, a0, 1 ; RV64IM-NEXT: add a0, a0, a1 -; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: srli a0, a0, 2 ; RV64IM-NEXT: ret ; @@ -364,7 +359,6 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV64IMZB-NEXT: zext.h a0, a0 ; RV64IMZB-NEXT: srli a0, a0, 1 ; RV64IMZB-NEXT: add a0, a0, a1 -; RV64IMZB-NEXT: zext.h a0, a0 ; RV64IMZB-NEXT: srli a0, a0, 2 ; RV64IMZB-NEXT: ret %1 = udiv i16 %a, 7 diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp index 089fb00d6080d..8563d7f1f15c9 100644 --- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp @@ -190,7 +190,7 @@ TEST_F(AArch64GISelMITest, TestKnownBitsDecreasingCstPHIWithLoop) { // Therefore, %14's known zero are 0x80 shifted by one 0xC0. // If we had simulated the loop we could have more zero bits, basically // up to 0xFC (count leading zero of 5, + 1). - EXPECT_EQ((uint64_t)0xC0, Res.Zero.getZExtValue()); + EXPECT_EQ((uint64_t)0xFC, Res.Zero.getZExtValue()); KnownBits Res2 = Info.getKnownBits(DstReg); EXPECT_EQ(Res.One.getZExtValue(), Res2.One.getZExtValue()); diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp index 73ddf0c88d3ed..6b70ae9739179 100644 --- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp @@ -220,7 +220,7 @@ TEST_F(AArch64GISelMITest, TestKnownBitsVectorDecreasingCstPHIWithLoop) { GISelValueTracking Info(*MF, /*MaxDepth=*/24); KnownBits Res = Info.getKnownBits(SrcReg); EXPECT_EQ((uint64_t)0, Res.One.getZExtValue()); - EXPECT_EQ((uint64_t)0xC0, Res.Zero.getZExtValue()); + EXPECT_EQ((uint64_t)0xFC, Res.Zero.getZExtValue()); KnownBits Res2 = Info.getKnownBits(DstReg); EXPECT_EQ(Res.One.getZExtValue(), Res2.One.getZExtValue()); From 7b0d91002691e6fe4ad421682f7d2a12c21d9055 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Mon, 15 Sep 2025 08:33:34 +0200 Subject: [PATCH 298/734] [clang][bytecode] Check for dummy pointers in CopyArray op (#158543) They can't be written to or read from. Fixes #158535 --- clang/lib/AST/ByteCode/Interp.h | 3 +++ clang/test/AST/ByteCode/vectors.cpp | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 9a7bd03bea077..d8362ee3176a0 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -3156,6 +3156,9 @@ inline bool CopyArray(InterpState &S, CodePtr OpPC, uint32_t SrcIndex, const auto &SrcPtr = S.Stk.pop(); const auto &DestPtr = S.Stk.peek(); + if (SrcPtr.isDummy() || DestPtr.isDummy()) + return false; + for (uint32_t I = 0; I != Size; ++I) { const Pointer &SP = SrcPtr.atIndex(SrcIndex + I); diff --git a/clang/test/AST/ByteCode/vectors.cpp b/clang/test/AST/ByteCode/vectors.cpp index 91fec8f86f613..2eb615e906cf5 100644 --- a/clang/test/AST/ByteCode/vectors.cpp +++ b/clang/test/AST/ByteCode/vectors.cpp @@ -168,3 +168,15 @@ namespace Assign { } static_assert(invalid()); // both-error {{not an integral constant expression}} } + +namespace CopyArrayDummy { + struct S { + long a, b, c, d; + }; + typedef long T __attribute__((vector_size(4 * sizeof(long)))); + + void foo(void) { + struct S s; + *(T *)&s = (T){0, 1, 2, 3}; + } +} From d69238013fa10c3027975874b283b08a25e90fd0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 14 Sep 2025 23:42:44 -0700 Subject: [PATCH 299/734] [ADT] Define countr_zero in terms of popcount (NFC) (#158519) We can express the fallback mechanism of llvm::countr_zero a lot more concisely with llvm::popcount. Since llvm::countr_zero now requires llvm::popcount, this patch moves llvm::popcount earlier. --- llvm/include/llvm/ADT/bit.h | 74 ++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 42 deletions(-) diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h index 2ca9b43519740..67c0a1c3300fa 100644 --- a/llvm/include/llvm/ADT/bit.h +++ b/llvm/include/llvm/ADT/bit.h @@ -148,6 +148,35 @@ template >> return (Value != 0) && ((Value & (Value - 1)) == 0); } +/// Count the number of set bits in a value. +/// Ex. popcount(0xF000F000) = 8 +/// Returns 0 if Value is zero. +template [[nodiscard]] inline int popcount(T Value) noexcept { + static_assert(std::is_unsigned_v, "T must be an unsigned integer type"); + static_assert(sizeof(T) <= 8, "T must be 8 bytes or less"); + + if constexpr (sizeof(T) <= 4) { +#if defined(__GNUC__) + return (int)__builtin_popcount(Value); +#else + uint32_t V = Value; + V = V - ((V >> 1) & 0x55555555); + V = (V & 0x33333333) + ((V >> 2) & 0x33333333); + return int(((V + (V >> 4) & 0xF0F0F0F) * 0x1010101) >> 24); +#endif + } else { +#if defined(__GNUC__) + return (int)__builtin_popcountll(Value); +#else + uint64_t V = Value; + V = V - ((V >> 1) & 0x5555555555555555ULL); + V = (V & 0x3333333333333333ULL) + ((V >> 2) & 0x3333333333333333ULL); + V = (V + (V >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + return int((uint64_t)(V * 0x0101010101010101ULL) >> 56); +#endif + } +} + /// Count number of 0's from the least significant bit to the most /// stopping at the first 1. /// @@ -179,19 +208,9 @@ template [[nodiscard]] int countr_zero(T Val) { #endif } - // Fall back to the bisection method. - unsigned ZeroBits = 0; - T Shift = std::numeric_limits::digits >> 1; - T Mask = std::numeric_limits::max() >> Shift; - while (Shift) { - if ((Val & Mask) == 0) { - Val >>= Shift; - ZeroBits |= Shift; - } - Shift >>= 1; - Mask >>= Shift; - } - return ZeroBits; + // Fallback to popcount. "(Val & -Val) - 1" is a bitmask with all bits below + // the least significant 1 set. + return llvm::popcount(static_cast>((Val & -Val) - 1)); } /// Count number of 0's from the most significant bit to the least @@ -300,35 +319,6 @@ template [[nodiscard]] T bit_ceil(T Value) { return T(1) << llvm::bit_width(Value - 1u); } -/// Count the number of set bits in a value. -/// Ex. popcount(0xF000F000) = 8 -/// Returns 0 if the word is zero. -template >> -[[nodiscard]] inline int popcount(T Value) noexcept { - if constexpr (sizeof(T) <= 4) { -#if defined(__GNUC__) - return (int)__builtin_popcount(Value); -#else - uint32_t v = Value; - v = v - ((v >> 1) & 0x55555555); - v = (v & 0x33333333) + ((v >> 2) & 0x33333333); - return int(((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24); -#endif - } else if constexpr (sizeof(T) <= 8) { -#if defined(__GNUC__) - return (int)__builtin_popcountll(Value); -#else - uint64_t v = Value; - v = v - ((v >> 1) & 0x5555555555555555ULL); - v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); - v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; - return int((uint64_t)(v * 0x0101010101010101ULL) >> 56); -#endif - } else { - static_assert(sizeof(T) == 0, "T must be 8 bytes or less"); - } -} - // Forward-declare rotr so that rotl can use it. template >> [[nodiscard]] constexpr T rotr(T V, int R); From 24b58678bcde7d33117941b443f9bcd0fe67767a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 15 Sep 2025 08:49:47 +0200 Subject: [PATCH 300/734] [clang][bytecode] Disable int128 test if unsupported This broke the armv8-quick builder: https://lab.llvm.org/buildbot/#/builders/154/builds/21492 --- clang/test/AST/ByteCode/builtin-bit-cast.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp index fede780fd66ec..32c1f41e0e059 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp @@ -530,6 +530,8 @@ constexpr const intptr_t &returns_local() { return 0L; } // both-note@+1 {{read of temporary whose lifetime has ended}} constexpr nullptr_t test_nullptr_bad = __builtin_bit_cast(nullptr_t, returns_local()); + +#ifdef __SIZEOF_INT128__ namespace VectorCast { typedef unsigned X __attribute__ ((vector_size (64))); typedef unsigned __int128 Y __attribute__ ((vector_size (64))); @@ -568,5 +570,5 @@ namespace VectorCast { static_assert(s.a == 0); // ref-error {{not an integral constant expression}} \ // ref-note {{initializer of 's' is not a constant expression}} #endif - } +#endif From c60972a2cf16faba38844c82addeb78c893d5b3b Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Mon, 15 Sep 2025 09:19:26 +0200 Subject: [PATCH 301/734] [clang][bytecode][NFC] Surround Pointer diagram in \verbatim (#158550) See if this fixes the documentation. --- clang/lib/AST/ByteCode/Pointer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index 49d701c3e27b6..9e67374c3354f 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -75,7 +75,7 @@ enum class Storage { Block, Int, Fn, Typeid }; /// data the pointer decribes can be found at /// Pointee->rawData() + Pointer.Offset. /// -/// +/// \verbatim /// Pointee Offset /// │ │ /// │ │ @@ -87,6 +87,7 @@ enum class Storage { Block, Int, Fn, Typeid }; /// │ /// │ /// Base +/// \endverbatim class Pointer { private: static constexpr unsigned PastEndMark = ~0u; From 02d3e6ac75e776041fb1782efc4dfccfe6b46218 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Mon, 15 Sep 2025 09:33:12 +0200 Subject: [PATCH 302/734] [HIP][Clang] Remove __AMDGCN_WAVEFRONT_SIZE macros (#157463) Remove definitions, test uses, and documentation of the macros, which were deprecated in November 2024 with PR #112849 / #115507. Where required, the wavefront size should instead be queried via means provided by the HIP runtime: the (non-constexpr) `warpSize` variable in device code, or `hipGetDeviceProperties` in host code. This change passed AMD-internal testing. Implements SWDEV-522062. --- clang/docs/AMDGPUSupport.rst | 4 - clang/docs/HIPSupport.rst | 3 +- clang/lib/Basic/Targets/AMDGPU.cpp | 6 - .../CodeGenHIP/maybe_undef-attr-verify.hip | 2 +- .../CodeGenOpenCL/builtins-amdgcn-wave32.cl | 6 +- .../CodeGenOpenCL/builtins-amdgcn-wave64.cl | 4 - clang/test/Driver/amdgpu-macros.cl | 16 --- clang/test/Driver/hip-macros.hip | 23 ---- ...wavefront-size-deprecation-diagnostics.hip | 115 ------------------ .../Preprocessor/predefined-arch-macros.c | 2 - 10 files changed, 3 insertions(+), 178 deletions(-) delete mode 100644 clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst index 3eada5f900613..18e3de8abe92a 100644 --- a/clang/docs/AMDGPUSupport.rst +++ b/clang/docs/AMDGPUSupport.rst @@ -49,10 +49,6 @@ Predefined Macros - Defined as 1 if the CU mode is enabled and 0 if the WGP mode is enabled. * - ``__AMDGCN_UNSAFE_FP_ATOMICS__`` - Defined if unsafe floating-point atomics are allowed. - * - ``__AMDGCN_WAVEFRONT_SIZE__`` - - Defines the wavefront size. Allowed values are 32 and 64 (deprecated). - * - ``__AMDGCN_WAVEFRONT_SIZE`` - - Alias to ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated). * - ``__HAS_FMAF__`` - Defined if FMAF instruction is available (deprecated). * - ``__HAS_LDEXPF__`` diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst index b4a671e3cfa3c..0d04b842af025 100644 --- a/clang/docs/HIPSupport.rst +++ b/clang/docs/HIPSupport.rst @@ -178,8 +178,7 @@ Predefined Macros - Alias to ``__HIP_API_PER_THREAD_DEFAULT_STREAM__``. Deprecated. Note that some architecture specific AMDGPU macros will have default values when -used from the HIP host compilation. Other :doc:`AMDGPU macros ` -like ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated) will default to 64 for example. +used from the HIP host compilation. Compilation Modes ================= diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 87de9e6865e71..443dfbc93a182 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -356,12 +356,6 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, if (hasFastFMA()) Builder.defineMacro("FP_FAST_FMA"); - Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize), - "compile-time-constant access to the wavefront size will " - "be removed in a future release"); - Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize), - "compile-time-constant access to the wavefront size will " - "be removed in a future release"); Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode)); } diff --git a/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip index 571fba148f5cc..6dc57c4fcc5fc 100644 --- a/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip +++ b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip @@ -20,7 +20,7 @@ #define __maybe_undef __attribute__((maybe_undef)) #define WARP_SIZE 64 -static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE__; +static constexpr int warpSize = WARP_SIZE; __device__ static inline unsigned int __lane_id() { return __builtin_amdgcn_mbcnt_hi( diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index d390418523694..31fd0e7bceaf5 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -1,5 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -D__AMDGCN_WAVEFRONT_SIZE=32 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck -enable-var-scope %s // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s @@ -48,7 +48,3 @@ void test_read_exec_lo(global uint* out) { void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } - -#if __AMDGCN_WAVEFRONT_SIZE != 32 -#error Wrong wavesize detected -#endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index d851ec7e6734f..758b5aa532d73 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -50,7 +50,3 @@ void test_read_exec_lo(global ulong* out) { void test_read_exec_hi(global ulong* out) { *out = __builtin_amdgcn_read_exec_hi(); } - -#if defined(__AMDGCN_WAVEFRONT_SIZE__) && __AMDGCN_WAVEFRONT_SIZE__ != 64 -#error Wrong wavesize detected -#endif diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index a60593f2ab9ed..dd6fcc773a32b 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -153,26 +153,10 @@ // ARCH-GCN-DAG: #define __[[CPU]]__ 1 // ARCH-GCN-DAG: #define __[[FAMILY]]__ 1 // ARCH-GCN-DAG: #define __amdgcn_processor__ "[[CPU]]" -// ARCH-GCN-DAG: #define __AMDGCN_WAVEFRONT_SIZE [[WAVEFRONT_SIZE]] // ARCH-GCN-DAG: #define __GCC_DESTRUCTIVE_SIZE 128 // ARCH-GCN-DAG: #define __GCC_CONSTRUCTIVE_SIZE 128 // UNSAFEFPATOMIC-DAG: #define __AMDGCN_UNSAFE_FP_ATOMICS__ 1 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ -// RUN: %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ -// RUN: %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ -// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ -// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE32 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mno-wavefrontsize64 \ -// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 \ -// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 -// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 - // RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 \ // RUN: %s 2>&1 | FileCheck --check-prefix=CUMODE-ON %s // RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mcumode \ diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip index 516e01a6c4743..4c460d50bf39a 100644 --- a/clang/test/Driver/hip-macros.hip +++ b/clang/test/Driver/hip-macros.hip @@ -1,27 +1,4 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ -// RUN: --cuda-device-only -nogpuinc -nogpulib \ -// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ -// RUN: --cuda-device-only -nogpuinc -nogpulib \ -// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ -// RUN: --cuda-device-only -nogpuinc -nogpulib \ -// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ -// RUN: --cuda-device-only -nogpuinc -nogpulib \ -// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE32 %s -// RUN: %clang -E -dM --offload-arch=gfx906 -mno-wavefrontsize64 \ -// RUN: --cuda-device-only -nogpuinc -nogpulib \ -// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// RUN: %clang -E -dM --offload-arch=gfx1010 -mno-wavefrontsize64 \ -// RUN: --cuda-device-only -nogpuinc -nogpulib \ -// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 64 -// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 32 -// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 -// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 - // RUN: %clang -E -dM --offload-arch=gfx906 --cuda-device-only -nogpuinc -nogpulib \ // RUN: %s 2>&1 | FileCheck --check-prefix=CUMODE-ON %s // RUN: %clang -E -dM --offload-arch=gfx906 --cuda-device-only -nogpuinc -nogpulib -mcumode \ diff --git a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip deleted file mode 100644 index 8a60f5a150048..0000000000000 --- a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip +++ /dev/null @@ -1,115 +0,0 @@ -// REQUIRES: amdgpu-registered-target -// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s -// RUN: %clang -xhip --offload-arch=gfx1030 --offload-device-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s - -// Test that deprecation warnings for the wavefront size macro are emitted properly. - -#define WRAPPED __AMDGCN_WAVEFRONT_SIZE__ - -#define DOUBLE_WRAPPED (WRAPPED) - -template struct my_enable_if {}; - -template struct my_enable_if { - typedef T type; -}; - -__attribute__((host, device)) void use(int, const char*); - -template __attribute__((host, device)) int templatify(int x) { - return x + N; -} - -__attribute__((device)) const int GlobalConst = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -constexpr int GlobalConstExpr = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - -#if defined(__HIP_DEVICE_COMPILE__) && (__AMDGCN_WAVEFRONT_SIZE__ == 64) // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -int foo(void); -#endif - -__attribute__((device)) int device_var = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - -__attribute__((device)) -void device_fun() { - use(__AMDGCN_WAVEFRONT_SIZE, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} - use(__AMDGCN_WAVEFRONT_SIZE__, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(WRAPPED, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(DOUBLE_WRAPPED, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(GlobalConst, "device function"); - use(GlobalConstExpr, "device function"); -} - -__attribute__((global)) -void global_fun() { - // no warnings expected - use(__AMDGCN_WAVEFRONT_SIZE, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} - use(__AMDGCN_WAVEFRONT_SIZE__, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(WRAPPED, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(DOUBLE_WRAPPED, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -} - -int host_var = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -int host_var_alt = __AMDGCN_WAVEFRONT_SIZE; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} -int host_var_wrapped = WRAPPED; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -int host_var_double_wrapped = DOUBLE_WRAPPED; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - -__attribute__((host)) -void host_fun() { - use(__AMDGCN_WAVEFRONT_SIZE, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} - use(__AMDGCN_WAVEFRONT_SIZE__, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(WRAPPED, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(DOUBLE_WRAPPED, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(GlobalConst, "host function"); - use(GlobalConstExpr, "host function"); -} - -__attribute((host, device)) -void host_device_fun() { - use(__AMDGCN_WAVEFRONT_SIZE__, "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(WRAPPED, "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(DOUBLE_WRAPPED, "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -} - -template // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -class FunSelector { -public: - template - __attribute__((device)) - auto fun(void) - -> typename my_enable_if<(FunWarpSize <= __AMDGCN_WAVEFRONT_SIZE__), void>::type // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - { - use(1, "yay!"); - } - - template - __attribute__((device)) - auto fun(void) - -> typename my_enable_if<(FunWarpSize > __AMDGCN_WAVEFRONT_SIZE__), void>::type // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - { - use(0, "nay!"); - } -}; - -__attribute__((device)) -void device_fun_selector_user() { - FunSelector<> f; - f.fun<>(); - f.fun<1>(); - f.fun<1000>(); - - my_enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE__), int>::type x = 42; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -} - -__attribute__((device)) my_enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE__), int>::type DeviceFunTemplateRet(void) { // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - return 42; -} - -__attribute__((device)) int DeviceFunTemplateArg(my_enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE__), int>::type x) { // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - return x; -} - -// expected-note@* 0+ {{macro marked 'deprecated' here}} diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index ecddf130a5c51..ebdfc8b79e063 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -4410,7 +4410,6 @@ // CHECK_AMDGCN_NONE-NOT: #define __HAS_FMAF__ // CHECK_AMDGCN_NONE-NOT: #define __HAS_FP64__ // CHECK_AMDGCN_NONE-NOT: #define __HAS_LDEXPF__ -// CHECK_AMDGCN_NONE-NOT: #define __AMDGCN_WAVEFRONT_SIZE__ // Begin r600 tests ---------------- @@ -4431,7 +4430,6 @@ // RUN: %clang -x hip -E -dM %s -o - 2>&1 --offload-host-only -nogpulib \ // RUN: -nogpuinc --offload-arch=gfx803 -target x86_64-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_HIP_HOST -// CHECK_HIP_HOST: #define __AMDGCN_WAVEFRONT_SIZE__ 64 // CHECK_HIP_HOST: #define __AMDGPU__ 1 // CHECK_HIP_HOST: #define __AMD__ 1 From 3371375131c645bb579323b60e92f2b0f9079f24 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 15 Sep 2025 09:34:04 +0200 Subject: [PATCH 303/734] [InstCombine] Read-only call without return can capture (#157878) The copied from constant memory analysis had a special case where nocapture was not required for read-only calls without (or unused) return. This is not correct, as the address can still be captured though means other than memory and the return value, for example using divergence. This code should not be trying to do its own nocapture inference. --- .../InstCombineLoadStoreAlloca.cpp | 4 +-- .../InstCombine/memcpy-from-global.ll | 27 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 4b10586616c29..53e77e6cc5c31 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -107,8 +107,8 @@ isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, // a load (but one that potentially returns the value itself), so we can // ignore it if we know that the value isn't captured. bool NoCapture = Call->doesNotCapture(DataOpNo); - if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) || - (Call->onlyReadsMemory(DataOpNo) && NoCapture)) + if (NoCapture && + (Call->onlyReadsMemory() || Call->onlyReadsMemory(DataOpNo))) continue; } diff --git a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll index ff85d827bdcb4..f10ba1e3d27e6 100644 --- a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll +++ b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll @@ -139,13 +139,14 @@ define void @test2_addrspacecast() { ret void } -declare void @bar(ptr) -declare void @bar_as1(ptr addrspace(1)) +declare void @bar(ptr nocapture) +declare void @bar_may_capture(ptr) +declare void @bar_as1(ptr addrspace(1) nocapture) ;; Should be able to eliminate the alloca. -define void @test3() { -; CHECK-LABEL: @test3( +define void @test3_nocapture() { +; CHECK-LABEL: @test3_nocapture( ; CHECK-NEXT: call void @bar(ptr nonnull @G) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: ret void ; @@ -155,6 +156,20 @@ define void @test3() { ret void } +; Can not eliminate the alloca, as the function may capture its address. +define void @test3_may_capture() { +; CHECK-LABEL: @test3_may_capture( +; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(124) [[A]], ptr noundef nonnull align 16 dereferenceable(124) @G, i64 124, i1 false) +; CHECK-NEXT: call void @bar_may_capture(ptr nonnull [[A]]) #[[ATTR3]] +; CHECK-NEXT: ret void +; + %A = alloca %T + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @G, i64 124, i1 false) + call void @bar_may_capture(ptr %A) readonly + ret void +} + define void @test3_addrspacecast() { ; CHECK-LABEL: @test3_addrspacecast( ; CHECK-NEXT: call void @bar(ptr nonnull @G) #[[ATTR3]] @@ -395,12 +410,12 @@ define void @memcpy_to_capturing_readonly() { ; CHECK-LABEL: @memcpy_to_capturing_readonly( ; CHECK-NEXT: [[A:%.*]] = alloca [[U:%.*]], align 16 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(20) [[A]], ptr noundef nonnull align 16 dereferenceable(20) @H, i64 20, i1 false) -; CHECK-NEXT: call void @bar(ptr nonnull readonly [[A]]) +; CHECK-NEXT: call void @bar_may_capture(ptr nonnull readonly [[A]]) ; CHECK-NEXT: ret void ; %A = alloca %U, align 16 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr align 4 @H, i64 20, i1 false) - call void @bar(ptr readonly %A) + call void @bar_may_capture(ptr readonly %A) ret void } From 0ee7c9434a4745a10cb68217134a356b63a346f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Mon, 15 Sep 2025 08:35:03 +0100 Subject: [PATCH 304/734] [mlir][vector] Tidy-up testing for to/from_elements unrolling (#158309) 1. Remove `TestUnrollVectorToElements` and `TestUnrollVectorFromElements` test passes - these are not required. 2. Make "vector-from-elements-lowering.mlir" use TD Op for testing (for consistency "vector-to-elements-lowering.mlir" and to make sure that the TD Op, `transform.apply_patterns.vector.unroll_from_elements`, is tested). 3. Unify `CHECK` prefixes (`CHECK-UNROLL` -> `CHECK`). 4. Rename `@to_elements_1d` as `@negative_unroll_to_elements_1d`, for consistency with it's counterpart for `vector.from_elements` and to align with our testing guide (*). (*) https://mlir.llvm.org/getting_started/TestingGuide/#after-step-3-add-the-newly-identified-missing-case --- .../Dialect/Vector/td/unroll-elements.mlir | 9 ++-- .../Vector/td/xfer-drop-unit-dims.mlir | 1 + .../Vector/vector-from-elements-lowering.mlir | 49 ++++++++++--------- .../Vector/vector-to-elements-lowering.mlir | 15 ++++-- .../Dialect/Vector/TestVectorTransforms.cpp | 48 ------------------ 5 files changed, 42 insertions(+), 80 deletions(-) diff --git a/mlir/test/Dialect/Vector/td/unroll-elements.mlir b/mlir/test/Dialect/Vector/td/unroll-elements.mlir index 40a90a33b0ac4..f7c69b503a561 100644 --- a/mlir/test/Dialect/Vector/td/unroll-elements.mlir +++ b/mlir/test/Dialect/Vector/td/unroll-elements.mlir @@ -1,11 +1,14 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @unroll_to_elements(%module_op: !transform.any_op {transform.readonly}) { - %f = transform.structured.match ops{["func.func"]} in %module_op + + %func_op = transform.structured.match ops{["func.func"]} in %module_op : (!transform.any_op) -> !transform.any_op - transform.apply_patterns to %f { - transform.apply_patterns.vector.transfer_permutation_patterns + transform.apply_patterns to %func_op { + // Test patterns transform.apply_patterns.vector.unroll_to_elements + transform.apply_patterns.vector.unroll_from_elements } : !transform.any_op + transform.yield } } diff --git a/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir b/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir index 5bffa20842b0c..44a823801d1cd 100644 --- a/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir +++ b/mlir/test/Dialect/Vector/td/xfer-drop-unit-dims.mlir @@ -3,6 +3,7 @@ module @transforms attributes { transform.with_named_sequence } { %func_op = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.op<"func.func"> transform.apply_patterns to %func_op { + // Test patterns transform.apply_patterns.vector.drop_inner_most_unit_dims_from_xfer_ops } : !transform.op<"func.func"> diff --git a/mlir/test/Dialect/Vector/vector-from-elements-lowering.mlir b/mlir/test/Dialect/Vector/vector-from-elements-lowering.mlir index 8fac608ed5692..0d1bc662d09a0 100644 --- a/mlir/test/Dialect/Vector/vector-from-elements-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-from-elements-lowering.mlir @@ -1,33 +1,34 @@ -// RUN: mlir-opt %s -test-unroll-vector-from-elements | FileCheck %s --check-prefix=CHECK-UNROLL +// RUN: mlir-opt %s -transform-preload-library='transform-library-paths=%p/td/unroll-elements.mlir' \ +// RUN: -transform-interpreter=entry-point=unroll_to_elements | FileCheck %s //===----------------------------------------------------------------------===// // Test UnrollFromElements. //===----------------------------------------------------------------------===// -// CHECK-UNROLL-LABEL: @unroll_from_elements_2d -// CHECK-UNROLL-SAME: (%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32) -// CHECK-UNROLL-NEXT: %[[UNDEF_RES:.*]] = ub.poison : vector<2x2xf32> -// CHECK-UNROLL-NEXT: %[[VEC_0:.*]] = vector.from_elements %[[ARG0]], %[[ARG1]] : vector<2xf32> -// CHECK-UNROLL-NEXT: %[[RES_0:.*]] = vector.insert %[[VEC_0]], %[[UNDEF_RES]] [0] : vector<2xf32> into vector<2x2xf32> -// CHECK-UNROLL-NEXT: %[[VEC_1:.*]] = vector.from_elements %[[ARG2]], %[[ARG3]] : vector<2xf32> -// CHECK-UNROLL-NEXT: %[[RES_1:.*]] = vector.insert %[[VEC_1]], %[[RES_0]] [1] : vector<2xf32> into vector<2x2xf32> -// CHECK-UNROLL-NEXT: return %[[RES_1]] : vector<2x2xf32> +// CHECK-LABEL: @unroll_from_elements_2d +// CHECK-SAME: (%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32) +// CHECK-NEXT: %[[UNDEF_RES:.*]] = ub.poison : vector<2x2xf32> +// CHECK-NEXT: %[[VEC_0:.*]] = vector.from_elements %[[ARG0]], %[[ARG1]] : vector<2xf32> +// CHECK-NEXT: %[[RES_0:.*]] = vector.insert %[[VEC_0]], %[[UNDEF_RES]] [0] : vector<2xf32> into vector<2x2xf32> +// CHECK-NEXT: %[[VEC_1:.*]] = vector.from_elements %[[ARG2]], %[[ARG3]] : vector<2xf32> +// CHECK-NEXT: %[[RES_1:.*]] = vector.insert %[[VEC_1]], %[[RES_0]] [1] : vector<2xf32> into vector<2x2xf32> +// CHECK-NEXT: return %[[RES_1]] : vector<2x2xf32> func.func @unroll_from_elements_2d(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32) -> vector<2x2xf32> { %0 = vector.from_elements %arg0, %arg1, %arg2, %arg3 : vector<2x2xf32> return %0 : vector<2x2xf32> } -// CHECK-UNROLL-LABEL: @unroll_from_elements_3d -// CHECK-UNROLL-SAME: (%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32) -// CHECK-UNROLL-NEXT: %[[UNDEF_RES:.*]] = ub.poison : vector<2x1x2xf32> -// CHECK-UNROLL-NEXT: %[[UNDEF_RANK_2:.*]] = ub.poison : vector<1x2xf32> -// CHECK-UNROLL-NEXT: %[[VEC_0:.*]] = vector.from_elements %[[ARG0]], %[[ARG1]] : vector<2xf32> -// CHECK-UNROLL-NEXT: %[[RANK_2_0:.*]] = vector.insert %[[VEC_0]], %[[UNDEF_RANK_2]] [0] : vector<2xf32> into vector<1x2xf32> -// CHECK-UNROLL-NEXT: %[[RES_0:.*]] = vector.insert %[[RANK_2_0]], %[[UNDEF_RES]] [0] : vector<1x2xf32> into vector<2x1x2xf32> -// CHECK-UNROLL-NEXT: %[[VEC_1:.*]] = vector.from_elements %[[ARG2]], %[[ARG3]] : vector<2xf32> -// CHECK-UNROLL-NEXT: %[[RANK_2_1:.*]] = vector.insert %[[VEC_1]], %[[UNDEF_RANK_2]] [0] : vector<2xf32> into vector<1x2xf32> -// CHECK-UNROLL-NEXT: %[[RES_1:.*]] = vector.insert %[[RANK_2_1]], %[[RES_0]] [1] : vector<1x2xf32> into vector<2x1x2xf32> -// CHECK-UNROLL-NEXT: return %[[RES_1]] : vector<2x1x2xf32> +// CHECK-LABEL: @unroll_from_elements_3d +// CHECK-SAME: (%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32) +// CHECK-NEXT: %[[UNDEF_RES:.*]] = ub.poison : vector<2x1x2xf32> +// CHECK-NEXT: %[[UNDEF_RANK_2:.*]] = ub.poison : vector<1x2xf32> +// CHECK-NEXT: %[[VEC_0:.*]] = vector.from_elements %[[ARG0]], %[[ARG1]] : vector<2xf32> +// CHECK-NEXT: %[[RANK_2_0:.*]] = vector.insert %[[VEC_0]], %[[UNDEF_RANK_2]] [0] : vector<2xf32> into vector<1x2xf32> +// CHECK-NEXT: %[[RES_0:.*]] = vector.insert %[[RANK_2_0]], %[[UNDEF_RES]] [0] : vector<1x2xf32> into vector<2x1x2xf32> +// CHECK-NEXT: %[[VEC_1:.*]] = vector.from_elements %[[ARG2]], %[[ARG3]] : vector<2xf32> +// CHECK-NEXT: %[[RANK_2_1:.*]] = vector.insert %[[VEC_1]], %[[UNDEF_RANK_2]] [0] : vector<2xf32> into vector<1x2xf32> +// CHECK-NEXT: %[[RES_1:.*]] = vector.insert %[[RANK_2_1]], %[[RES_0]] [1] : vector<1x2xf32> into vector<2x1x2xf32> +// CHECK-NEXT: return %[[RES_1]] : vector<2x1x2xf32> func.func @unroll_from_elements_3d(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32) -> vector<2x1x2xf32> { %0 = vector.from_elements %arg0, %arg1, %arg2, %arg3 : vector<2x1x2xf32> return %0 : vector<2x1x2xf32> @@ -35,10 +36,10 @@ func.func @unroll_from_elements_3d(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f3 // 1-D vector.from_elements should not be unrolled. -// CHECK-UNROLL-LABEL: @negative_unroll_from_elements_1d -// CHECK-UNROLL-SAME: (%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32) -// CHECK-UNROLL-NEXT: %[[RES:.*]] = vector.from_elements %[[ARG0]], %[[ARG1]] : vector<2xf32> -// CHECK-UNROLL-NEXT: return %[[RES]] : vector<2xf32> +// CHECK-LABEL: @negative_unroll_from_elements_1d +// CHECK-SAME: (%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32) +// CHECK-NEXT: %[[RES:.*]] = vector.from_elements %[[ARG0]], %[[ARG1]] : vector<2xf32> +// CHECK-NEXT: return %[[RES]] : vector<2xf32> func.func @negative_unroll_from_elements_1d(%arg0: f32, %arg1: f32) -> vector<2xf32> { %0 = vector.from_elements %arg0, %arg1 : vector<2xf32> return %0 : vector<2xf32> diff --git a/mlir/test/Dialect/Vector/vector-to-elements-lowering.mlir b/mlir/test/Dialect/Vector/vector-to-elements-lowering.mlir index 9ec0d76599c41..c521bf0138f98 100644 --- a/mlir/test/Dialect/Vector/vector-to-elements-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-to-elements-lowering.mlir @@ -1,26 +1,31 @@ -// RUN: mlir-opt %s -test-unroll-vector-to-elements -split-input-file | FileCheck %s // RUN: mlir-opt %s -transform-preload-library='transform-library-paths=%p/td/unroll-elements.mlir' \ // RUN: -transform-interpreter=entry-point=unroll_to_elements | FileCheck %s -// CHECK-LABEL: func.func @to_elements_1d( +//===----------------------------------------------------------------------===// +// Test UnrollToElements. +//===----------------------------------------------------------------------===// + +// 1-D vector.from_elements should not be unrolled. + +// CHECK-LABEL: func.func @negative_unroll_to_elements_1d( // CHECK-SAME: %[[ARG0:.+]]: vector<2xf32> // CHECK: %[[RES:.+]]:2 = vector.to_elements %[[ARG0]] : vector<2xf32> // CHECK: return %[[RES]]#0, %[[RES]]#1 -func.func @to_elements_1d(%arg0: vector<2xf32>) -> (f32, f32) { +func.func @negative_unroll_to_elements_1d(%arg0: vector<2xf32>) -> (f32, f32) { %0:2 = vector.to_elements %arg0 : vector<2xf32> return %0#0, %0#1 : f32, f32 } // ----- -// CHECK-LABEL: func.func @to_elements_2d( +// CHECK-LABEL: func.func @unroll_to_elements_2d( // CHECK-SAME: %[[ARG0:.+]]: vector<2x2xf32> // CHECK: %[[VEC0:.+]] = vector.extract %[[ARG0]][0] : vector<2xf32> from vector<2x2xf32> // CHECK: %[[VEC1:.+]] = vector.extract %[[ARG0]][1] : vector<2xf32> from vector<2x2xf32> // CHECK: %[[RES0:.+]]:2 = vector.to_elements %[[VEC0]] : vector<2xf32> // CHECK: %[[RES1:.+]]:2 = vector.to_elements %[[VEC1]] : vector<2xf32> // CHECK: return %[[RES0]]#0, %[[RES0]]#1, %[[RES1]]#0, %[[RES1]]#1 -func.func @to_elements_2d(%arg0: vector<2x2xf32>) -> (f32, f32, f32, f32) { +func.func @unroll_to_elements_2d(%arg0: vector<2x2xf32>) -> (f32, f32, f32, f32) { %0:4 = vector.to_elements %arg0 : vector<2x2xf32> return %0#0, %0#1, %0#2, %0#3 : f32, f32, f32, f32 } diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index c2d184626818f..3360f3b70a817 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -756,50 +756,6 @@ struct TestVectorGatherLowering } }; -struct TestUnrollVectorFromElements - : public PassWrapper> { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestUnrollVectorFromElements) - - StringRef getArgument() const final { - return "test-unroll-vector-from-elements"; - } - StringRef getDescription() const final { - return "Test unrolling patterns for from_elements ops"; - } - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - - void runOnOperation() override { - RewritePatternSet patterns(&getContext()); - populateVectorFromElementsLoweringPatterns(patterns); - (void)applyPatternsGreedily(getOperation(), std::move(patterns)); - } -}; - -struct TestUnrollVectorToElements - : public PassWrapper> { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestUnrollVectorToElements) - - StringRef getArgument() const final { - return "test-unroll-vector-to-elements"; - } - StringRef getDescription() const final { - return "Test unrolling patterns for to_elements ops"; - } - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - - void runOnOperation() override { - RewritePatternSet patterns(&getContext()); - populateVectorToElementsLoweringPatterns(patterns); - (void)applyPatternsGreedily(getOperation(), std::move(patterns)); - } -}; - struct TestFoldArithExtensionIntoVectorContractPatterns : public PassWrapper> { @@ -1071,10 +1027,6 @@ void registerTestVectorLowerings() { PassRegistration(); - PassRegistration(); - - PassRegistration(); - PassRegistration(); PassRegistration(); From e4124c04799a53b663a58938292a7c123ee21556 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 15 Sep 2025 07:38:03 +0000 Subject: [PATCH 305/734] [mlir][Bazel] Add missing dependency after 48babe193186248e --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 66cb7956c89f2..ffa4a2effbfd1 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -6074,6 +6074,7 @@ cc_library( ":LLVMDialect", ":NVVMOpsIncGen", ":NVVMRequiresSMTraitsIncGen", + ":PtrDialect", ":SideEffectInterfaces", ":Support", ":ToLLVMIRTranslation", From 78bf682cb9033cf6a5bbc733e062c7b7d825fdaf Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Mon, 15 Sep 2025 10:09:46 +0200 Subject: [PATCH 306/734] Revert "[HIP][Clang] Remove __AMDGCN_WAVEFRONT_SIZE macros" (#158566) Reverts llvm/llvm-project#157463 The PR breaks buildbots with old ROCm versions, so revert it and reapply when buildbots are updated. --- clang/docs/AMDGPUSupport.rst | 4 + clang/docs/HIPSupport.rst | 3 +- clang/lib/Basic/Targets/AMDGPU.cpp | 6 + .../CodeGenHIP/maybe_undef-attr-verify.hip | 2 +- .../CodeGenOpenCL/builtins-amdgcn-wave32.cl | 6 +- .../CodeGenOpenCL/builtins-amdgcn-wave64.cl | 4 + clang/test/Driver/amdgpu-macros.cl | 16 +++ clang/test/Driver/hip-macros.hip | 23 ++++ ...wavefront-size-deprecation-diagnostics.hip | 115 ++++++++++++++++++ .../Preprocessor/predefined-arch-macros.c | 2 + 10 files changed, 178 insertions(+), 3 deletions(-) create mode 100644 clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst index 18e3de8abe92a..3eada5f900613 100644 --- a/clang/docs/AMDGPUSupport.rst +++ b/clang/docs/AMDGPUSupport.rst @@ -49,6 +49,10 @@ Predefined Macros - Defined as 1 if the CU mode is enabled and 0 if the WGP mode is enabled. * - ``__AMDGCN_UNSAFE_FP_ATOMICS__`` - Defined if unsafe floating-point atomics are allowed. + * - ``__AMDGCN_WAVEFRONT_SIZE__`` + - Defines the wavefront size. Allowed values are 32 and 64 (deprecated). + * - ``__AMDGCN_WAVEFRONT_SIZE`` + - Alias to ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated). * - ``__HAS_FMAF__`` - Defined if FMAF instruction is available (deprecated). * - ``__HAS_LDEXPF__`` diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst index 0d04b842af025..b4a671e3cfa3c 100644 --- a/clang/docs/HIPSupport.rst +++ b/clang/docs/HIPSupport.rst @@ -178,7 +178,8 @@ Predefined Macros - Alias to ``__HIP_API_PER_THREAD_DEFAULT_STREAM__``. Deprecated. Note that some architecture specific AMDGPU macros will have default values when -used from the HIP host compilation. +used from the HIP host compilation. Other :doc:`AMDGPU macros ` +like ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated) will default to 64 for example. Compilation Modes ================= diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 443dfbc93a182..87de9e6865e71 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -356,6 +356,12 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, if (hasFastFMA()) Builder.defineMacro("FP_FAST_FMA"); + Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize), + "compile-time-constant access to the wavefront size will " + "be removed in a future release"); + Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize), + "compile-time-constant access to the wavefront size will " + "be removed in a future release"); Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode)); } diff --git a/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip index 6dc57c4fcc5fc..571fba148f5cc 100644 --- a/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip +++ b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip @@ -20,7 +20,7 @@ #define __maybe_undef __attribute__((maybe_undef)) #define WARP_SIZE 64 -static constexpr int warpSize = WARP_SIZE; +static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE__; __device__ static inline unsigned int __lane_id() { return __builtin_amdgcn_mbcnt_hi( diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index 31fd0e7bceaf5..d390418523694 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -1,5 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -D__AMDGCN_WAVEFRONT_SIZE=32 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck -enable-var-scope %s // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s @@ -48,3 +48,7 @@ void test_read_exec_lo(global uint* out) { void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } + +#if __AMDGCN_WAVEFRONT_SIZE != 32 +#error Wrong wavesize detected +#endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index 758b5aa532d73..d851ec7e6734f 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -50,3 +50,7 @@ void test_read_exec_lo(global ulong* out) { void test_read_exec_hi(global ulong* out) { *out = __builtin_amdgcn_read_exec_hi(); } + +#if defined(__AMDGCN_WAVEFRONT_SIZE__) && __AMDGCN_WAVEFRONT_SIZE__ != 64 +#error Wrong wavesize detected +#endif diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index dd6fcc773a32b..a60593f2ab9ed 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -153,10 +153,26 @@ // ARCH-GCN-DAG: #define __[[CPU]]__ 1 // ARCH-GCN-DAG: #define __[[FAMILY]]__ 1 // ARCH-GCN-DAG: #define __amdgcn_processor__ "[[CPU]]" +// ARCH-GCN-DAG: #define __AMDGCN_WAVEFRONT_SIZE [[WAVEFRONT_SIZE]] // ARCH-GCN-DAG: #define __GCC_DESTRUCTIVE_SIZE 128 // ARCH-GCN-DAG: #define __GCC_CONSTRUCTIVE_SIZE 128 // UNSAFEFPATOMIC-DAG: #define __AMDGCN_UNSAFE_FP_ATOMICS__ 1 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ +// RUN: %s 2>&1 | FileCheck --check-prefix=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ +// RUN: %s 2>&1 | FileCheck --check-prefix=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE32 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mno-wavefrontsize64 \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s +// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 + // RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 \ // RUN: %s 2>&1 | FileCheck --check-prefix=CUMODE-ON %s // RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mcumode \ diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip index 4c460d50bf39a..516e01a6c4743 100644 --- a/clang/test/Driver/hip-macros.hip +++ b/clang/test/Driver/hip-macros.hip @@ -1,4 +1,27 @@ // REQUIRES: amdgpu-registered-target +// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE32 %s +// RUN: %clang -E -dM --offload-arch=gfx906 -mno-wavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx1010 -mno-wavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 64 +// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 32 +// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 + // RUN: %clang -E -dM --offload-arch=gfx906 --cuda-device-only -nogpuinc -nogpulib \ // RUN: %s 2>&1 | FileCheck --check-prefix=CUMODE-ON %s // RUN: %clang -E -dM --offload-arch=gfx906 --cuda-device-only -nogpuinc -nogpulib -mcumode \ diff --git a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip new file mode 100644 index 0000000000000..8a60f5a150048 --- /dev/null +++ b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip @@ -0,0 +1,115 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s +// RUN: %clang -xhip --offload-arch=gfx1030 --offload-device-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s + +// Test that deprecation warnings for the wavefront size macro are emitted properly. + +#define WRAPPED __AMDGCN_WAVEFRONT_SIZE__ + +#define DOUBLE_WRAPPED (WRAPPED) + +template struct my_enable_if {}; + +template struct my_enable_if { + typedef T type; +}; + +__attribute__((host, device)) void use(int, const char*); + +template __attribute__((host, device)) int templatify(int x) { + return x + N; +} + +__attribute__((device)) const int GlobalConst = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} +constexpr int GlobalConstExpr = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + +#if defined(__HIP_DEVICE_COMPILE__) && (__AMDGCN_WAVEFRONT_SIZE__ == 64) // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} +int foo(void); +#endif + +__attribute__((device)) int device_var = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + +__attribute__((device)) +void device_fun() { + use(__AMDGCN_WAVEFRONT_SIZE, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} + use(__AMDGCN_WAVEFRONT_SIZE__, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(WRAPPED, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(DOUBLE_WRAPPED, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(GlobalConst, "device function"); + use(GlobalConstExpr, "device function"); +} + +__attribute__((global)) +void global_fun() { + // no warnings expected + use(__AMDGCN_WAVEFRONT_SIZE, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} + use(__AMDGCN_WAVEFRONT_SIZE__, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(WRAPPED, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(DOUBLE_WRAPPED, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} +} + +int host_var = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} +int host_var_alt = __AMDGCN_WAVEFRONT_SIZE; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} +int host_var_wrapped = WRAPPED; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} +int host_var_double_wrapped = DOUBLE_WRAPPED; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + +__attribute__((host)) +void host_fun() { + use(__AMDGCN_WAVEFRONT_SIZE, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} + use(__AMDGCN_WAVEFRONT_SIZE__, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(WRAPPED, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(DOUBLE_WRAPPED, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(GlobalConst, "host function"); + use(GlobalConstExpr, "host function"); +} + +__attribute((host, device)) +void host_device_fun() { + use(__AMDGCN_WAVEFRONT_SIZE__, "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(WRAPPED, "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(DOUBLE_WRAPPED, "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} +} + +template // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} +class FunSelector { +public: + template + __attribute__((device)) + auto fun(void) + -> typename my_enable_if<(FunWarpSize <= __AMDGCN_WAVEFRONT_SIZE__), void>::type // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + { + use(1, "yay!"); + } + + template + __attribute__((device)) + auto fun(void) + -> typename my_enable_if<(FunWarpSize > __AMDGCN_WAVEFRONT_SIZE__), void>::type // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + { + use(0, "nay!"); + } +}; + +__attribute__((device)) +void device_fun_selector_user() { + FunSelector<> f; + f.fun<>(); + f.fun<1>(); + f.fun<1000>(); + + my_enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE__), int>::type x = 42; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} +} + +__attribute__((device)) my_enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE__), int>::type DeviceFunTemplateRet(void) { // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + return 42; +} + +__attribute__((device)) int DeviceFunTemplateArg(my_enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE__), int>::type x) { // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} + return x; +} + +// expected-note@* 0+ {{macro marked 'deprecated' here}} diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index ebdfc8b79e063..ecddf130a5c51 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -4410,6 +4410,7 @@ // CHECK_AMDGCN_NONE-NOT: #define __HAS_FMAF__ // CHECK_AMDGCN_NONE-NOT: #define __HAS_FP64__ // CHECK_AMDGCN_NONE-NOT: #define __HAS_LDEXPF__ +// CHECK_AMDGCN_NONE-NOT: #define __AMDGCN_WAVEFRONT_SIZE__ // Begin r600 tests ---------------- @@ -4430,6 +4431,7 @@ // RUN: %clang -x hip -E -dM %s -o - 2>&1 --offload-host-only -nogpulib \ // RUN: -nogpuinc --offload-arch=gfx803 -target x86_64-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_HIP_HOST +// CHECK_HIP_HOST: #define __AMDGCN_WAVEFRONT_SIZE__ 64 // CHECK_HIP_HOST: #define __AMDGPU__ 1 // CHECK_HIP_HOST: #define __AMD__ 1 From e85926545e6313fd3c5c6147e82db42132c389ac Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 15 Sep 2025 09:20:58 +0100 Subject: [PATCH 307/734] [libcxx][CI] Use lld for everything in the ARM picolib builds (#158320) Our host compiler is a clang install that will default to ld if not told otherwise. We were telling meson to use lld, but the way that we did it was outdated, which lead to picolib producing a linker script that lld could not use. The tests were in fact linking with ld instead. Using the `c_ld` setting fixes this problem. See: https://mesonbuild.com/Machine-files.html#binaries Then to use lld in tests we need `-fuse-ld=lld` in the config files. Some of these options were not needed for clang 19.1.7, but were for clang 21.1.1. We will soon update to 21.1.1 so I have included all of the required options in this PR. --- libcxx/cmake/caches/Armv7M-picolibc.cmake | 1 + libcxx/test/configs/armv7m-picolibc-libc++.cfg.in | 2 +- libcxx/utils/ci/build-picolibc.sh | 2 +- libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in | 2 +- libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/libcxx/cmake/caches/Armv7M-picolibc.cmake b/libcxx/cmake/caches/Armv7M-picolibc.cmake index 0f8189b457285..9df71fba2cadd 100644 --- a/libcxx/cmake/caches/Armv7M-picolibc.cmake +++ b/libcxx/cmake/caches/Armv7M-picolibc.cmake @@ -5,6 +5,7 @@ set(CMAKE_C_COMPILER_TARGET "armv7m-none-eabi" CACHE STRING "") set(CMAKE_C_FLAGS "-mfloat-abi=soft" CACHE STRING "") set(CMAKE_SYSTEM_NAME Generic CACHE STRING "") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY CACHE STRING "") +set(LLVM_USE_LINKER "lld" CACHE STRING "") set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "") set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "") set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "") diff --git a/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in b/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in index 9bff5021494ef..b2669a713e2c0 100644 --- a/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in +++ b/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in @@ -13,7 +13,7 @@ config.substitutions.append(('%{compile_flags}', ' -Wno-atomic-alignment' )) config.substitutions.append(('%{link_flags}', - '-nostdlib -nostdlib++ -L %{lib-dir} -lc++ -lc++abi' + '-fuse-ld=lld -nostdlib -nostdlib++ -L %{lib-dir} -lc++ -lc++abi' ' -lc -lm -lclang_rt.builtins -lsemihost -lcrt0-semihost' + ' -T {}'.format(libc_linker_script) + ' -Wl,--defsym=__flash=0x0' diff --git a/libcxx/utils/ci/build-picolibc.sh b/libcxx/utils/ci/build-picolibc.sh index 521c1bef9fc7e..4be768d741230 100755 --- a/libcxx/utils/ci/build-picolibc.sh +++ b/libcxx/utils/ci/build-picolibc.sh @@ -81,7 +81,7 @@ cat < "${picolibc_build_dir}/meson-cross-build.txt" c = ['${CC:-cc}', '--target=${target}', '-mfloat-abi=soft', '-nostdlib'] ar = 'llvm-ar' as = 'llvm-as' -ld = 'lld' +c_ld = 'lld' strip = 'llvm-strip' [host_machine] system = 'none' diff --git a/libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in b/libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in index b4744f935ad85..0594ba4ce89b7 100644 --- a/libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in +++ b/libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in @@ -8,7 +8,7 @@ config.substitutions.append(('%{compile_flags}', '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -I %{libcxx}/test/support -I %{libcxx}/src -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS' )) config.substitutions.append(('%{link_flags}', - '-nostdlib -nostdlib++ -L %{lib} -lc++ -lc++abi' + '-fuse-ld=lld -nostdlib -nostdlib++ -L %{lib} -lc++ -lc++abi' ' -lc -lm -lclang_rt.builtins -lsemihost -lcrt0-semihost' + ' -T {}'.format(libc_linker_script) + ' -Wl,--defsym=__flash=0x0' diff --git a/libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in b/libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in index e8f68a51fc53f..fc54900e1e0a1 100644 --- a/libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in +++ b/libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in @@ -8,7 +8,7 @@ config.substitutions.append(('%{compile_flags}', '-nostdinc++ -I %{include}' )) config.substitutions.append(('%{link_flags}', - '-nostdlib -nostdlib++ -L %{lib} -lunwind' + '-fuse-ld=lld -nostdlib -nostdlib++ -L %{lib} -lunwind' ' -lc -lm -lclang_rt.builtins -lsemihost -lcrt0-semihost' + ' -T {}'.format(libc_linker_script) + ' -Wl,--defsym=__flash=0x0' From 757bb36a58c7d7151a28c6a5fc7caa2e1f44de87 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 15 Sep 2025 08:48:04 +0000 Subject: [PATCH 308/734] [lldb][test] Disable a test from TestDAP_cancel.py on Windows Flakey on our Windows on Arm bot: https://lab.llvm.org/buildbot/#/builders/141/builds/11516 See https://github.com/llvm/llvm-project/issues/137660 --- lldb/test/API/tools/lldb-dap/cancel/TestDAP_cancel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/tools/lldb-dap/cancel/TestDAP_cancel.py b/lldb/test/API/tools/lldb-dap/cancel/TestDAP_cancel.py index 109f34ff10a5d..9dea325694f00 100644 --- a/lldb/test/API/tools/lldb-dap/cancel/TestDAP_cancel.py +++ b/lldb/test/API/tools/lldb-dap/cancel/TestDAP_cancel.py @@ -70,6 +70,7 @@ def test_pending_request(self): self.assertEqual(cancel_resp["success"], True) self.continue_to_exit() + @skipIfWindows # https://github.com/llvm/llvm-project/issues/137660 def test_inflight_request(self): """ Tests cancelling an inflight request. From fd4ef8e601fe1ea52162f63815192540f3660f39 Mon Sep 17 00:00:00 2001 From: Mahesh-Attarde Date: Mon, 15 Sep 2025 14:33:14 +0530 Subject: [PATCH 309/734] [X86][GlobalIsel] Support G_INTRINSIC_TRUNC/G_FCEIL/G_FFLOOR (#156633) This PR adds support for C/CPP Lib Intrinsic G_INTRINSIC_TRUNC/G_FCEIL/G_FFLOOR from LangRef in GlobalIsel. --- .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 11 +++-- llvm/test/CodeGen/X86/isel-ceil.ll | 49 ++++++++++++++++--- llvm/test/CodeGen/X86/isel-floor.ll | 49 ++++++++++++++++--- llvm/test/CodeGen/X86/isel-ftrunc.ll | 49 ++++++++++++++++--- 4 files changed, 135 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 7fe58539cd4ec..2c752457d165e 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -98,10 +98,11 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .widenScalarToNextPow2(0, /*Min=*/8) .clampScalar(0, s8, sMaxScalar); - getActionDefinitionsBuilder( - {G_LROUND, G_LLROUND, G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, - G_FASIN, G_FTAN, G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, - G_FEXP2, G_FEXP10, G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS}) + getActionDefinitionsBuilder({G_LROUND, G_LLROUND, G_FCOS, G_FCOSH, G_FACOS, + G_FSIN, G_FSINH, G_FASIN, G_FTAN, G_FTANH, + G_FATAN, G_FATAN2, G_FPOW, G_FEXP, G_FEXP2, + G_FEXP10, G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, + G_FSINCOS, G_FCEIL, G_FFLOOR}) .libcall(); getActionDefinitionsBuilder(G_FSQRT) @@ -580,7 +581,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .lower(); // fp intrinsics - getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN) + getActionDefinitionsBuilder({G_INTRINSIC_ROUNDEVEN, G_INTRINSIC_TRUNC}) .scalarize(0) .minScalar(0, LLT::scalar(32)) .libcall(); diff --git a/llvm/test/CodeGen/X86/isel-ceil.ll b/llvm/test/CodeGen/X86/isel-ceil.ll index c82cfebd4814d..21df3f1160003 100644 --- a/llvm/test/CodeGen/X86/isel-ceil.ll +++ b/llvm/test/CodeGen/X86/isel-ceil.ll @@ -3,8 +3,8 @@ ; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 ; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 ; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X64 -; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 define float @ceil_f32(float %a) nounwind readnone { ; DAG-X64-LABEL: ceil_f32: @@ -29,7 +29,19 @@ define float @ceil_f32(float %a) nounwind readnone { ; ; GISEL-X64-LABEL: ceil_f32: ; GISEL-X64: # %bb.0: -; GISEL-X64-NEXT: jmp ceilf@PLT # TAILCALL +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq ceilf +; GISEL-X64-NEXT: popq %rax +; GISEL-X64-NEXT: retq +; +; GISEL-X86-LABEL: ceil_f32: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: calll ceilf +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl %c = call float @llvm.ceil.f32(float %a) ret float %c } @@ -57,7 +69,24 @@ define double @ceil_f64(double %a) nounwind readnone { ; ; GISEL-X64-LABEL: ceil_f64: ; GISEL-X64: # %bb.0: -; GISEL-X64-NEXT: jmp ceil@PLT # TAILCALL +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq ceil +; GISEL-X64-NEXT: popq %rax +; GISEL-X64-NEXT: retq +; +; GISEL-X86-LABEL: ceil_f64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl 4(%eax), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: addl %esp, %edx +; GISEL-X86-NEXT: movl %ecx, (%esp) +; GISEL-X86-NEXT: movl %eax, 4(%edx) +; GISEL-X86-NEXT: calll ceil +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl %c = call double @llvm.ceil.f64(double %a) ret double %c } @@ -86,10 +115,18 @@ define x86_fp80 @ceil_f80(x86_fp80 %a) nounwind readnone { ; GISEL-X64-NEXT: subq $24, %rsp ; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) ; GISEL-X64-NEXT: fstpt (%rsp) -; GISEL-X64-NEXT: callq ceill@PLT +; GISEL-X64-NEXT: callq ceill ; GISEL-X64-NEXT: addq $24, %rsp ; GISEL-X64-NEXT: retq +; +; GISEL-X86-LABEL: ceil_f80: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: fstpt (%esp) +; GISEL-X86-NEXT: calll ceill +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl %c = call x86_fp80 @llvm.ceil.f80(x86_fp80 %a) ret x86_fp80 %c } - diff --git a/llvm/test/CodeGen/X86/isel-floor.ll b/llvm/test/CodeGen/X86/isel-floor.ll index 675925b611263..66eeee89169ba 100644 --- a/llvm/test/CodeGen/X86/isel-floor.ll +++ b/llvm/test/CodeGen/X86/isel-floor.ll @@ -3,8 +3,8 @@ ; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 ; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 ; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X64 -; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 define float @floor_f32(float %a) nounwind readnone { ; DAG-X64-LABEL: floor_f32: @@ -29,7 +29,19 @@ define float @floor_f32(float %a) nounwind readnone { ; ; GISEL-X64-LABEL: floor_f32: ; GISEL-X64: # %bb.0: -; GISEL-X64-NEXT: jmp floorf@PLT # TAILCALL +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq floorf +; GISEL-X64-NEXT: popq %rax +; GISEL-X64-NEXT: retq +; +; GISEL-X86-LABEL: floor_f32: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: calll floorf +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl %c = call float @llvm.floor.f32(float %a) ret float %c } @@ -57,7 +69,24 @@ define double @floor_f64(double %a) nounwind readnone { ; ; GISEL-X64-LABEL: floor_f64: ; GISEL-X64: # %bb.0: -; GISEL-X64-NEXT: jmp floor@PLT # TAILCALL +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq floor +; GISEL-X64-NEXT: popq %rax +; GISEL-X64-NEXT: retq +; +; GISEL-X86-LABEL: floor_f64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl 4(%eax), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: addl %esp, %edx +; GISEL-X86-NEXT: movl %ecx, (%esp) +; GISEL-X86-NEXT: movl %eax, 4(%edx) +; GISEL-X86-NEXT: calll floor +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl %c = call double @llvm.floor.f64(double %a) ret double %c } @@ -86,10 +115,18 @@ define x86_fp80 @floor_f80(x86_fp80 %a) nounwind readnone { ; GISEL-X64-NEXT: subq $24, %rsp ; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) ; GISEL-X64-NEXT: fstpt (%rsp) -; GISEL-X64-NEXT: callq floorl@PLT +; GISEL-X64-NEXT: callq floorl ; GISEL-X64-NEXT: addq $24, %rsp ; GISEL-X64-NEXT: retq +; +; GISEL-X86-LABEL: floor_f80: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: fstpt (%esp) +; GISEL-X86-NEXT: calll floorl +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl %c = call x86_fp80 @llvm.floor.f80(x86_fp80 %a) ret x86_fp80 %c } - diff --git a/llvm/test/CodeGen/X86/isel-ftrunc.ll b/llvm/test/CodeGen/X86/isel-ftrunc.ll index 9bf06193961a3..dcdb016d29aca 100644 --- a/llvm/test/CodeGen/X86/isel-ftrunc.ll +++ b/llvm/test/CodeGen/X86/isel-ftrunc.ll @@ -3,8 +3,8 @@ ; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 ; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 ; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X64 -; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 define float @trunc_f32(float %a) nounwind readnone { ; DAG-X64-LABEL: trunc_f32: @@ -29,7 +29,19 @@ define float @trunc_f32(float %a) nounwind readnone { ; ; GISEL-X64-LABEL: trunc_f32: ; GISEL-X64: # %bb.0: -; GISEL-X64-NEXT: jmp truncf@PLT # TAILCALL +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq truncf +; GISEL-X64-NEXT: popq %rax +; GISEL-X64-NEXT: retq +; +; GISEL-X86-LABEL: trunc_f32: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: calll truncf +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl %c = call float @llvm.trunc.f32(float %a) ret float %c } @@ -57,7 +69,24 @@ define double @trunc_f64(double %a) nounwind readnone { ; ; GISEL-X64-LABEL: trunc_f64: ; GISEL-X64: # %bb.0: -; GISEL-X64-NEXT: jmp trunc@PLT # TAILCALL +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq trunc +; GISEL-X64-NEXT: popq %rax +; GISEL-X64-NEXT: retq +; +; GISEL-X86-LABEL: trunc_f64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl 4(%eax), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: addl %esp, %edx +; GISEL-X86-NEXT: movl %ecx, (%esp) +; GISEL-X86-NEXT: movl %eax, 4(%edx) +; GISEL-X86-NEXT: calll trunc +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl %c = call double @llvm.trunc.f64(double %a) ret double %c } @@ -86,10 +115,18 @@ define x86_fp80 @trunc_f80(x86_fp80 %a) nounwind readnone { ; GISEL-X64-NEXT: subq $24, %rsp ; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) ; GISEL-X64-NEXT: fstpt (%rsp) -; GISEL-X64-NEXT: callq truncl@PLT +; GISEL-X64-NEXT: callq truncl ; GISEL-X64-NEXT: addq $24, %rsp ; GISEL-X64-NEXT: retq +; +; GISEL-X86-LABEL: trunc_f80: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: fstpt (%esp) +; GISEL-X86-NEXT: calll truncl +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl %c = call x86_fp80 @llvm.trunc.f80(x86_fp80 %a) ret x86_fp80 %c } - From 4de9bee7e53eb84a22511317e26dfd656b66df8b Mon Sep 17 00:00:00 2001 From: nerix Date: Mon, 15 Sep 2025 11:07:42 +0200 Subject: [PATCH 310/734] [LLDB][PDB] Require Windows for for testing PDB plugin-selection again (#158559) Amends #158284 and fixes the failure on `lldb-remote-linux-win` from https://github.com/llvm/llvm-project/pull/158284#issuecomment-3290154510. That builder is configured with the DIA SDK but builds for Linux, so the debug information will be DWARF, not PDB. --- lldb/test/Shell/SymbolFile/PDB/native-setting.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/Shell/SymbolFile/PDB/native-setting.cpp b/lldb/test/Shell/SymbolFile/PDB/native-setting.cpp index edf7508b88f17..a3077252f08f1 100644 --- a/lldb/test/Shell/SymbolFile/PDB/native-setting.cpp +++ b/lldb/test/Shell/SymbolFile/PDB/native-setting.cpp @@ -1,4 +1,4 @@ -// REQUIRES: diasdk +// REQUIRES: diasdk, target-windows // Test plugin.symbol-file.pdb.reader setting // RUN: %build -o %t.exe -- %s From d8c8c67dc5e1ac7f3a551750b9e21ec64ec5b283 Mon Sep 17 00:00:00 2001 From: Mahesh-Attarde Date: Mon, 15 Sep 2025 14:37:59 +0530 Subject: [PATCH 311/734] [X86][GlobalIsel] Add G_[U|S]MIN/G_[U|S]MAX scalar test coverage (#157621) Adds isel test for supporting G_[U|S]MIN/G_[U|S]MAX. --- llvm/test/CodeGen/X86/isel-smax.ll | 189 +++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/isel-smin.ll | 189 +++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/isel-umax.ll | 189 +++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/isel-umin.ll | 189 +++++++++++++++++++++++++++++ 4 files changed, 756 insertions(+) create mode 100644 llvm/test/CodeGen/X86/isel-smax.ll create mode 100644 llvm/test/CodeGen/X86/isel-smin.ll create mode 100644 llvm/test/CodeGen/X86/isel-umax.ll create mode 100644 llvm/test/CodeGen/X86/isel-umin.ll diff --git a/llvm/test/CodeGen/X86/isel-smax.ll b/llvm/test/CodeGen/X86/isel-smax.ll new file mode 100644 index 0000000000000..9c9a48e3a1b3e --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-smax.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 + +define i8 @smax_i8(i8 %a, i8 %b) nounwind readnone { +; X64-LABEL: smax_i8: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmovgl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: smax_i8: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movzbl %dil, %ecx +; FASTISEL-X64-NEXT: movzbl %sil, %eax +; FASTISEL-X64-NEXT: cmpb %al, %cl +; FASTISEL-X64-NEXT: cmovgl %ecx, %eax +; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: smax_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpb %cl, %al +; X86-NEXT: jg .LBB0_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB0_2: +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: smax_i8: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpb %cl, %al +; FASTISEL-X86-NEXT: jg .LBB0_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB0_2: +; FASTISEL-X86-NEXT: retl + %ret = call i8 @llvm.smax.i8(i8 %a, i8 %b) + ret i8 %ret +} +define i16 @smax_i16(i16 %a, i16 %b) nounwind readnone { +; X64-LABEL: smax_i16: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmovgl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: smax_i16: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movl %esi, %eax +; FASTISEL-X64-NEXT: cmpw %ax, %di +; FASTISEL-X64-NEXT: cmovgl %edi, %eax +; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: smax_i16: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpw %cx, %ax +; X86-NEXT: jg .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB1_2: +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: smax_i16: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpw %cx, %ax +; FASTISEL-X86-NEXT: jg .LBB1_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB1_2: +; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; FASTISEL-X86-NEXT: retl + %ret = call i16 @llvm.smax.i16(i16 %a, i16 %b) + ret i16 %ret +} +define i32 @smax_i32(i32 %a, i32 %b) nounwind readnone { +; X64-LABEL: smax_i32: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: cmovgl %edi, %eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: smax_i32: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movl %esi, %eax +; FASTISEL-X64-NEXT: cmpl %esi, %edi +; FASTISEL-X64-NEXT: cmovgl %edi, %eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: smax_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jg .LBB2_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB2_2: +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: smax_i32: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpl %ecx, %eax +; FASTISEL-X86-NEXT: jg .LBB2_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB2_2: +; FASTISEL-X86-NEXT: retl + %ret = call i32 @llvm.smax.i32(i32 %a, i32 %b) + ret i32 %ret +} +define i64 @smax_i64(i64 %a, i64 %b) nounwind readnone { +; X64-LABEL: smax_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: cmpq %rsi, %rdi +; X64-NEXT: cmovgq %rdi, %rax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: smax_i64: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movq %rsi, %rax +; FASTISEL-X64-NEXT: cmpq %rsi, %rdi +; FASTISEL-X64-NEXT: cmovgq %rdi, %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: smax_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: jl .LBB3_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB3_2: +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: smax_i64: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: pushl %edi +; FASTISEL-X86-NEXT: pushl %esi +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpl %eax, %esi +; FASTISEL-X86-NEXT: movl %ecx, %edi +; FASTISEL-X86-NEXT: sbbl %edx, %edi +; FASTISEL-X86-NEXT: jl .LBB3_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %esi, %eax +; FASTISEL-X86-NEXT: movl %ecx, %edx +; FASTISEL-X86-NEXT: .LBB3_2: +; FASTISEL-X86-NEXT: popl %esi +; FASTISEL-X86-NEXT: popl %edi +; FASTISEL-X86-NEXT: retl + %ret = call i64 @llvm.smax.i64(i64 %a, i64 %b) + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/isel-smin.ll b/llvm/test/CodeGen/X86/isel-smin.ll new file mode 100644 index 0000000000000..7349a7c6a06f3 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-smin.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 + +define i8 @smin_i8(i8 %a, i8 %b) nounwind readnone { +; X64-LABEL: smin_i8: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmovll %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: smin_i8: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movzbl %dil, %ecx +; FASTISEL-X64-NEXT: movzbl %sil, %eax +; FASTISEL-X64-NEXT: cmpb %al, %cl +; FASTISEL-X64-NEXT: cmovll %ecx, %eax +; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: smin_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpb %cl, %al +; X86-NEXT: jl .LBB0_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB0_2: +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: smin_i8: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpb %cl, %al +; FASTISEL-X86-NEXT: jl .LBB0_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB0_2: +; FASTISEL-X86-NEXT: retl + %ret = call i8 @llvm.smin.i8(i8 %a, i8 %b) + ret i8 %ret +} +define i16 @smin_i16(i16 %a, i16 %b) nounwind readnone { +; X64-LABEL: smin_i16: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmovll %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: smin_i16: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movl %esi, %eax +; FASTISEL-X64-NEXT: cmpw %ax, %di +; FASTISEL-X64-NEXT: cmovll %edi, %eax +; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: smin_i16: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpw %cx, %ax +; X86-NEXT: jl .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB1_2: +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: smin_i16: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpw %cx, %ax +; FASTISEL-X86-NEXT: jl .LBB1_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB1_2: +; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; FASTISEL-X86-NEXT: retl + %ret = call i16 @llvm.smin.i16(i16 %a, i16 %b) + ret i16 %ret +} +define i32 @smin_i32(i32 %a, i32 %b) nounwind readnone { +; X64-LABEL: smin_i32: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: cmovll %edi, %eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: smin_i32: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movl %esi, %eax +; FASTISEL-X64-NEXT: cmpl %esi, %edi +; FASTISEL-X64-NEXT: cmovll %edi, %eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: smin_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jl .LBB2_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB2_2: +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: smin_i32: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpl %ecx, %eax +; FASTISEL-X86-NEXT: jl .LBB2_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB2_2: +; FASTISEL-X86-NEXT: retl + %ret = call i32 @llvm.smin.i32(i32 %a, i32 %b) + ret i32 %ret +} +define i64 @smin_i64(i64 %a, i64 %b) nounwind readnone { +; X64-LABEL: smin_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: cmpq %rsi, %rdi +; X64-NEXT: cmovlq %rdi, %rax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: smin_i64: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movq %rsi, %rax +; FASTISEL-X64-NEXT: cmpq %rsi, %rdi +; FASTISEL-X64-NEXT: cmovlq %rdi, %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: smin_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: jl .LBB3_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB3_2: +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: smin_i64: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: pushl %edi +; FASTISEL-X86-NEXT: pushl %esi +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpl %esi, %eax +; FASTISEL-X86-NEXT: movl %edx, %edi +; FASTISEL-X86-NEXT: sbbl %ecx, %edi +; FASTISEL-X86-NEXT: jl .LBB3_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %esi, %eax +; FASTISEL-X86-NEXT: movl %ecx, %edx +; FASTISEL-X86-NEXT: .LBB3_2: +; FASTISEL-X86-NEXT: popl %esi +; FASTISEL-X86-NEXT: popl %edi +; FASTISEL-X86-NEXT: retl + %ret = call i64 @llvm.smin.i64(i64 %a, i64 %b) + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/isel-umax.ll b/llvm/test/CodeGen/X86/isel-umax.ll new file mode 100644 index 0000000000000..a90456cdbebb1 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-umax.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 + +define i8 @umax_i8(i8 %a, i8 %b) nounwind readnone { +; X64-LABEL: umax_i8: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmoval %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: umax_i8: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movzbl %dil, %ecx +; FASTISEL-X64-NEXT: movzbl %sil, %eax +; FASTISEL-X64-NEXT: cmpb %al, %cl +; FASTISEL-X64-NEXT: cmoval %ecx, %eax +; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: umax_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpb %cl, %al +; X86-NEXT: ja .LBB0_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB0_2: +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: umax_i8: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpb %cl, %al +; FASTISEL-X86-NEXT: ja .LBB0_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB0_2: +; FASTISEL-X86-NEXT: retl + %ret = call i8 @llvm.umax.i8(i8 %a, i8 %b) + ret i8 %ret +} +define i16 @umax_i16(i16 %a, i16 %b) nounwind readnone { +; X64-LABEL: umax_i16: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmoval %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: umax_i16: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movl %esi, %eax +; FASTISEL-X64-NEXT: cmpw %ax, %di +; FASTISEL-X64-NEXT: cmoval %edi, %eax +; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: umax_i16: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpw %cx, %ax +; X86-NEXT: ja .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB1_2: +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: umax_i16: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpw %cx, %ax +; FASTISEL-X86-NEXT: ja .LBB1_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB1_2: +; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; FASTISEL-X86-NEXT: retl + %ret = call i16 @llvm.umax.i16(i16 %a, i16 %b) + ret i16 %ret +} +define i32 @umax_i32(i32 %a, i32 %b) nounwind readnone { +; X64-LABEL: umax_i32: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: cmoval %edi, %eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: umax_i32: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movl %esi, %eax +; FASTISEL-X64-NEXT: cmpl %esi, %edi +; FASTISEL-X64-NEXT: cmoval %edi, %eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: umax_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: ja .LBB2_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB2_2: +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: umax_i32: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpl %ecx, %eax +; FASTISEL-X86-NEXT: ja .LBB2_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB2_2: +; FASTISEL-X86-NEXT: retl + %ret = call i32 @llvm.umax.i32(i32 %a, i32 %b) + ret i32 %ret +} +define i64 @umax_i64(i64 %a, i64 %b) nounwind readnone { +; X64-LABEL: umax_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: cmpq %rsi, %rdi +; X64-NEXT: cmovaq %rdi, %rax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: umax_i64: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movq %rsi, %rax +; FASTISEL-X64-NEXT: cmpq %rsi, %rdi +; FASTISEL-X64-NEXT: cmovaq %rdi, %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: umax_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: jb .LBB3_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB3_2: +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: umax_i64: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: pushl %edi +; FASTISEL-X86-NEXT: pushl %esi +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpl %eax, %esi +; FASTISEL-X86-NEXT: movl %ecx, %edi +; FASTISEL-X86-NEXT: sbbl %edx, %edi +; FASTISEL-X86-NEXT: jb .LBB3_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %esi, %eax +; FASTISEL-X86-NEXT: movl %ecx, %edx +; FASTISEL-X86-NEXT: .LBB3_2: +; FASTISEL-X86-NEXT: popl %esi +; FASTISEL-X86-NEXT: popl %edi +; FASTISEL-X86-NEXT: retl + %ret = call i64 @llvm.umax.i64(i64 %a, i64 %b) + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/isel-umin.ll b/llvm/test/CodeGen/X86/isel-umin.ll new file mode 100644 index 0000000000000..53a0b277e6d7b --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-umin.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 + +define i8 @umin_i8(i8 %a, i8 %b) nounwind readnone { +; X64-LABEL: umin_i8: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmovbl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: umin_i8: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movzbl %dil, %ecx +; FASTISEL-X64-NEXT: movzbl %sil, %eax +; FASTISEL-X64-NEXT: cmpb %al, %cl +; FASTISEL-X64-NEXT: cmovbl %ecx, %eax +; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: umin_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpb %cl, %al +; X86-NEXT: jb .LBB0_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB0_2: +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: umin_i8: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpb %cl, %al +; FASTISEL-X86-NEXT: jb .LBB0_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB0_2: +; FASTISEL-X86-NEXT: retl + %ret = call i8 @llvm.umin.i8(i8 %a, i8 %b) + ret i8 %ret +} +define i16 @umin_i16(i16 %a, i16 %b) nounwind readnone { +; X64-LABEL: umin_i16: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmovbl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: umin_i16: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movl %esi, %eax +; FASTISEL-X64-NEXT: cmpw %ax, %di +; FASTISEL-X64-NEXT: cmovbl %edi, %eax +; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: umin_i16: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpw %cx, %ax +; X86-NEXT: jb .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB1_2: +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: umin_i16: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpw %cx, %ax +; FASTISEL-X86-NEXT: jb .LBB1_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB1_2: +; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; FASTISEL-X86-NEXT: retl + %ret = call i16 @llvm.umin.i16(i16 %a, i16 %b) + ret i16 %ret +} +define i32 @umin_i32(i32 %a, i32 %b) nounwind readnone { +; X64-LABEL: umin_i32: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: cmovbl %edi, %eax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: umin_i32: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movl %esi, %eax +; FASTISEL-X64-NEXT: cmpl %esi, %edi +; FASTISEL-X64-NEXT: cmovbl %edi, %eax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: umin_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jb .LBB2_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB2_2: +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: umin_i32: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpl %ecx, %eax +; FASTISEL-X86-NEXT: jb .LBB2_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %ecx, %eax +; FASTISEL-X86-NEXT: .LBB2_2: +; FASTISEL-X86-NEXT: retl + %ret = call i32 @llvm.umin.i32(i32 %a, i32 %b) + ret i32 %ret +} +define i64 @umin_i64(i64 %a, i64 %b) nounwind readnone { +; X64-LABEL: umin_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: cmpq %rsi, %rdi +; X64-NEXT: cmovbq %rdi, %rax +; X64-NEXT: retq +; +; FASTISEL-X64-LABEL: umin_i64: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: movq %rsi, %rax +; FASTISEL-X64-NEXT: cmpq %rsi, %rdi +; FASTISEL-X64-NEXT: cmovbq %rdi, %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: umin_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: jb .LBB3_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB3_2: +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; FASTISEL-X86-LABEL: umin_i64: +; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: pushl %edi +; FASTISEL-X86-NEXT: pushl %esi +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; FASTISEL-X86-NEXT: cmpl %esi, %eax +; FASTISEL-X86-NEXT: movl %edx, %edi +; FASTISEL-X86-NEXT: sbbl %ecx, %edi +; FASTISEL-X86-NEXT: jb .LBB3_2 +; FASTISEL-X86-NEXT: # %bb.1: +; FASTISEL-X86-NEXT: movl %esi, %eax +; FASTISEL-X86-NEXT: movl %ecx, %edx +; FASTISEL-X86-NEXT: .LBB3_2: +; FASTISEL-X86-NEXT: popl %esi +; FASTISEL-X86-NEXT: popl %edi +; FASTISEL-X86-NEXT: retl + %ret = call i64 @llvm.umin.i64(i64 %a, i64 %b) + ret i64 %ret +} From 50bcf6818e045a39eb21201f7c512e514476385e Mon Sep 17 00:00:00 2001 From: Brandon <61314499+brandonxin@users.noreply.github.com> Date: Mon, 15 Sep 2025 04:21:05 -0500 Subject: [PATCH 312/734] [X86][bytecode] Allow SSE/AVX BLEND imm intrinsics to be used in constexpr (#157776) This marks the following builtins as constexpr, which allows their corresponding intrinsics to be used in constexprs. | Intrinsics | X86 Builtins | CPUID Flags | Header | | -------------------- | --------------------------- | ----------- | ----------- | | `_mm_blend_pd` | `__builtin_ia32_blendpd` | SSE4.1 | smmintrin.h | | `_mm256_blend_pd` | `__builtin_ia32_blendpd256` | AVX | immintrin.h | | `_mm_blend_ps` | `__builtin_ia32_blendps` | SSE4.1 | smmintrin.h | | `_mm256_blend_ps` | `__builtin_ia32_blendps256` | AVX | immintrin.h | | `_mm_blend_epi16` | `__builtin_ia32_pblendw128` | SSE4.1 | smmintrin.h | | `_mm256_blend_epi16` | `__builtin_ia32_pblendw256` | AVX2 | immintrin.h | | `_mm_blend_epi32` | `__builtin_ia32_pblendd128` | AVX2 | immintrin.h | | `_mm256_blend_epi32` | `__builtin_ia32_pblendd256` | AVX2 | immintrin.h | Fixes #157065 --------- Co-authored-by: Timm Baeder Co-authored-by: Simon Pilgrim --- clang/include/clang/Basic/BuiltinsX86.td | 17 ++++----- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 44 ++++++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 27 +++++++++++++++ clang/test/CodeGen/X86/avx-builtins.c | 8 +++++ clang/test/CodeGen/X86/avx2-builtins.c | 12 +++++++ clang/test/CodeGen/X86/sse41-builtins.c | 13 ++++++- 6 files changed, 112 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 1a8645f99e281..dd7727a39f693 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -312,9 +312,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; - def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; - def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; - def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; @@ -333,6 +330,9 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] } let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; + def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; + def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">; @@ -469,8 +469,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">; def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">; def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; - def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; - def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; @@ -495,6 +493,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in } let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; + def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">; def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">; } @@ -575,7 +575,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; - def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -604,8 +603,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; - def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; - def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; @@ -619,6 +616,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; + def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; + def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; + def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; + def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">; def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 4461731c25648..40b9e04aa335c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2837,6 +2837,40 @@ static bool interp__builtin_select(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_blend(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + PrimType MaskT = *S.getContext().classify(Call->getArg(2)); + APSInt Mask = popToAPSInt(S.Stk, MaskT); + const Pointer &TrueVec = S.Stk.pop(); + const Pointer &FalseVec = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + assert(FalseVec.getNumElems() == TrueVec.getNumElems()); + assert(FalseVec.getNumElems() == Dst.getNumElems()); + unsigned NumElems = FalseVec.getNumElems(); + PrimType ElemT = FalseVec.getFieldDesc()->getPrimType(); + PrimType DstElemT = Dst.getFieldDesc()->getPrimType(); + + for (unsigned I = 0; I != NumElems; ++I) { + bool MaskBit = Mask[I % 8]; + if (ElemT == PT_Float) { + assert(DstElemT == PT_Float); + Dst.elem(I) = + MaskBit ? TrueVec.elem(I) : FalseVec.elem(I); + } else { + assert(DstElemT == ElemT); + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + Dst.elem(I) = + static_cast(MaskBit ? TrueVec.elem(I).toAPSInt() + : FalseVec.elem(I).toAPSInt()); + }); + } + } + Dst.initializeAllElements(); + + return true; +} + static bool interp__builtin_elementwise_triop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref @@ -3502,6 +3536,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return llvm::APIntOps::fshr(Hi, Lo, Amt); }); + case clang::X86::BI__builtin_ia32_blendpd: + case clang::X86::BI__builtin_ia32_blendpd256: + case clang::X86::BI__builtin_ia32_blendps: + case clang::X86::BI__builtin_ia32_blendps256: + case clang::X86::BI__builtin_ia32_pblendw128: + case clang::X86::BI__builtin_ia32_pblendw256: + case clang::X86::BI__builtin_ia32_pblendd128: + case clang::X86::BI__builtin_ia32_pblendd256: + return interp__builtin_blend(S, OpPC, Call); + case clang::X86::BI__builtin_ia32_blendvpd: case clang::X86::BI__builtin_ia32_blendvpd256: case clang::X86::BI__builtin_ia32_blendvps: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 820b053057067..77dc2203576b3 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11926,6 +11926,33 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_blendpd: + case X86::BI__builtin_ia32_blendpd256: + case X86::BI__builtin_ia32_blendps: + case X86::BI__builtin_ia32_blendps256: + case X86::BI__builtin_ia32_pblendw128: + case X86::BI__builtin_ia32_pblendw256: + case X86::BI__builtin_ia32_pblendd128: + case X86::BI__builtin_ia32_pblendd256: { + APValue SourceF, SourceT, SourceC; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceF) || + !EvaluateAsRValue(Info, E->getArg(1), SourceT) || + !EvaluateAsRValue(Info, E->getArg(2), SourceC)) + return false; + + const APInt &C = SourceC.getInt(); + unsigned SourceLen = SourceF.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(SourceLen); + for (unsigned EltNum = 0; EltNum != SourceLen; ++EltNum) { + const APValue &F = SourceF.getVectorElt(EltNum); + const APValue &T = SourceT.getVectorElt(EltNum); + ResultElements.push_back(C[EltNum % 8] ? T : F); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: case X86::BI__builtin_ia32_blendvps: diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 8223ab2b52cac..7b1a9cc4d9a7f 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -87,12 +87,20 @@ __m256d test_mm256_blend_pd(__m256d A, __m256d B) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> return _mm256_blend_pd(A, B, 0x05); } +TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x00), 1.0, 2.0, 3.0, 4.0)); +TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x05), 5.0, 2.0, 7.0, 4.0)); +TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x0A), 1.0, 6.0, 3.0, 8.0)); +TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x0F), 5.0, 6.0, 7.0, 8.0)); __m256 test_mm256_blend_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_blend_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> return _mm256_blend_ps(A, B, 0x35); } +TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0x00), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f)); +TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0x35), -1.0f, 2.0f, -3.0f, 4.0f, -5.0f, -6.0f, 7.0f, 8.0f)); +TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0xAA), 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f)); +TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0xFF), -1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f)); __m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) { // CHECK-LABEL: test_mm256_blendv_pd diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index aeb1aee4ea946..17ab47c72ad4b 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -146,6 +146,10 @@ __m256i test_mm256_blend_epi16(__m256i a, __m256i b) { // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> return _mm256_blend_epi16(a, b, 2); } +TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0x00), 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)); +TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0x5A), 1,-2,3,-4,-5,6,-7,8,9,-10,11,-12,-13,14,-15,16)); +TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0x94), 1,2,-3,4,-5,6,7,-8,9,10,-11,12,-13,14,15,-16)); +TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0xFF), -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16)); __m128i test_mm_blend_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_blend_epi32 @@ -153,6 +157,10 @@ __m128i test_mm_blend_epi32(__m128i a, __m128i b) { // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> return _mm_blend_epi32(a, b, 0x05); } +TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0x0), 1,2,3,4)); +TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0x5), -1,2,-3,4)); +TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0xA), 1,-2,3,-4)); +TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0xF), -1,-2,-3,-4)); __m256i test_mm256_blend_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_blend_epi32 @@ -160,6 +168,10 @@ __m256i test_mm256_blend_epi32(__m256i a, __m256i b) { // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> return _mm256_blend_epi32(a, b, 0x35); } +TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0x00), 1,2,3,4,5,6,7,8)); +TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0xA5), -1,2,-3,4,5,-6,7,-8)); +TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0x94), 1,2,-3,4,-5,6,7,-8)); +TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0xFF), -1,-2,-3,-4,-5,-6,-7,-8)); __m256i test_mm256_blendv_epi8(__m256i a, __m256i b, __m256i m) { // CHECK-LABEL: test_mm256_blendv_epi8 diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index dca161c8038a2..c7265b188d572 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -27,18 +27,30 @@ __m128i test_mm_blend_epi16(__m128i V1, __m128i V2) { // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> return _mm_blend_epi16(V1, V2, 42); } +TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0x00),1,2,3,4,5,6,7,8)); +TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0x5A),1,-2,3,-4,-5,6,-7,8)); +TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0x94),1,2,-3,4,-5,6,7,-8)); +TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0xFF),-1,-2,-3,-4,-5,-6,-7,-8)); __m128d test_mm_blend_pd(__m128d V1, __m128d V2) { // CHECK-LABEL: test_mm_blend_pd // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> return _mm_blend_pd(V1, V2, 2); } +TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 0), 1.0, 2.0)); +TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 1), 3.0, 2.0)); +TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 2), 1.0, 4.0)); +TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 3), 3.0, 4.0)); __m128 test_mm_blend_ps(__m128 V1, __m128 V2) { // CHECK-LABEL: test_mm_blend_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_blend_ps(V1, V2, 6); } +TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0x0), 1.0f, 2.0f, 3.0f, 4.0f)); +TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0x5), 5.0f, 2.0f, 7.0f, 4.0f)); +TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0xA), 1.0f, 6.0f, 3.0f, 8.0f)); +TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0xF), 5.0f, 6.0f, 7.0f, 8.0f)); __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) { // CHECK-LABEL: test_mm_blendv_epi8 @@ -459,4 +471,3 @@ int test_mm_testz_si128(__m128i x, __m128i y) { // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_testz_si128(x, y); } - From de0094edf0c8596550ed58d1b43e10969631a5ab Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 15 Sep 2025 10:38:31 +0100 Subject: [PATCH 313/734] [mlir][tosa] Introduce accumulator type for `reduce_sum` on bf16 (#158389) TOSA requires that `reduce_sum` operations on bf16 accumulate into fp32. This change updates the `linalg` legalization by introducing an explicit accumulator type to ensure compliance with the specification. --------- Signed-off-by: Georgios Pinitas --- .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 63 ++++++++++++++----- .../TosaToLinalg/tosa-to-linalg.mlir | 26 ++++++++ 2 files changed, 74 insertions(+), 15 deletions(-) diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index e2b31f640da2f..0a6f2477560a1 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -1160,6 +1160,12 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis, auto elementTy = resultTy.getElementType(); Value input = op->getOperand(0); + // Figure out the accType if needed + bool widenAccTy = std::is_same_v && + isa(elementTy) && + cast(elementTy).isBF16(); + Type accTy = widenAccTy ? rewriter.getF32Type() : elementTy; + SmallVector reduceShape; SmallVector dynDims; for (unsigned i = 0; i < inputTy.getRank(); i++) { @@ -1174,11 +1180,11 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis, inputs.push_back(input); // First fill the output buffer with the init value. - auto emptyTensor = tensor::EmptyOp::create(rewriter, loc, reduceShape, - resultTy.getElementType(), dynDims) - .getResult(); + auto emptyTensor = + tensor::EmptyOp::create(rewriter, loc, reduceShape, accTy, dynDims) + .getResult(); - auto fillValueAttr = createInitialValueForReduceOp(op, elementTy, rewriter); + auto fillValueAttr = createInitialValueForReduceOp(op, accTy, rewriter); if (!fillValueAttr) return rewriter.notifyMatchFailure( op, "No initial value found for reduction operation"); @@ -1231,8 +1237,14 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis, [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) { std::array binaryArgs{ blockArgs[0], isNanIgnoreMode ? blockArgs[2] : blockArgs[1]}; - auto result = createLinalgBodyCalculationForReduceOp( - op, binaryArgs, elementTy, rewriter); + + // If reduction type differs then extend (applicable to reduce_sum) + if (binaryArgs[0].getType() != accTy) + binaryArgs[0] = arith::ExtFOp::create(nestedBuilder, nestedLoc, accTy, + binaryArgs[0]); + + auto result = createLinalgBodyCalculationForReduceOp(op, binaryArgs, + accTy, rewriter); if (result) didEncounterError = true; @@ -1273,12 +1285,11 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis, // Create a tensor full of NaNs. auto nanValueAttr = rewriter.getFloatAttr( - elementTy, + accTy, APFloat::getNaN(cast(elementTy).getFloatSemantics(), false)); auto nanValue = arith::ConstantOp::create(rewriter, loc, nanValueAttr); auto emptyNanTensor = - tensor::EmptyOp::create(rewriter, loc, reduceShape, - resultTy.getElementType(), dynDims) + tensor::EmptyOp::create(rewriter, loc, reduceShape, accTy, dynDims) .getResult(); auto nanFilledTensor = linalg::FillOp::create(rewriter, loc, ValueRange{nanValue}, @@ -1288,8 +1299,7 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis, // Create an empty tensor, non need to fill this since it will be // overwritten by the select. auto finalEmptyTensor = - tensor::EmptyOp::create(rewriter, loc, reduceShape, - resultTy.getElementType(), dynDims) + tensor::EmptyOp::create(rewriter, loc, reduceShape, accTy, dynDims) .getResult(); // Do a selection between the tensors akin to: @@ -1304,9 +1314,32 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis, linalgOp = linalgSelect; } + // Truncate back to resultTy if needed + Value reducedRes = linalgOp->getResult(0); + if (widenAccTy) { + auto resEmptyOp = + tensor::EmptyOp::create(rewriter, loc, reduceShape, elementTy, dynDims) + .getResult(); + + const unsigned reducedRank = + cast(reducedRes.getType()).getRank(); + auto identityMap = rewriter.getMultiDimIdentityMap(reducedRank); + reducedRes = + linalg::GenericOp::create( + rewriter, loc, resEmptyOp.getType(), ValueRange{reducedRes}, + ValueRange{resEmptyOp}, + ArrayRef{identityMap, identityMap}, + getNParallelLoopsAttrs(reducedRank), + [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { + Value truncf = arith::TruncFOp::create(nestedBuilder, nestedLoc, + elementTy, args[0]); + linalg::YieldOp::create(nestedBuilder, nestedLoc, truncf); + }) + .getResults()[0]; + } + SmallVector reassociationMap; - uint64_t expandInputRank = - cast(linalgOp->getResults()[0].getType()).getRank(); + uint64_t expandInputRank = cast(reducedRes.getType()).getRank(); reassociationMap.resize(expandInputRank); for (uint64_t i = 0; i < expandInputRank; i++) { @@ -1324,8 +1357,8 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis, // since here we know which dimension to expand, and `tosa::ReshapeOp` would // not have access to such information. This matters when handling dynamically // sized tensors. - rewriter.replaceOpWithNewOp( - op, resultTy, linalgOp->getResults()[0], reassociationMap); + rewriter.replaceOpWithNewOp(op, resultTy, reducedRes, + reassociationMap); return success(); } diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 3fc513f823a1a..37af8b8859852 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -912,6 +912,32 @@ func.func @test_identity(%arg0: tensor<1xf32>, %arg1: tensor<1xi32>) -> (tensor< // ----- +// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)> +// CHECK-LABEL: @reduce_bf16 +// CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xbf16> +func.func @reduce_bf16(%arg0: tensor<5x4xbf16>) -> () { + // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<4xf32> + // CHECK: [[CST0:%.+]] = arith.constant 0.0 + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] + // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xbf16>) outs([[FILL]] : tensor<4xf32>) dimensions = [0] + // CHECK: (%[[ARG1:.*]]: bf16, %[[ARG2:.*]]: f32) { + // CHECK: [[EXTF:%.+]] = arith.extf %[[ARG1]] : bf16 to f32 + // CHECK: [[ACC:%.+]] = arith.addf [[EXTF]], %[[ARG2]] : f32 + // CHECK: linalg.yield [[ACC]] : f32 + // CHECK: } + // CHECK: [[INIT_RES:%.+]] = tensor.empty() : tensor<4xbf16> + // CHECK: [[RES:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[REDUCE]] : tensor<4xf32>) outs([[INIT_RES]] : tensor<4xbf16>) + // CHECK: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: bf16): + // CHECK: [[TRUNCF:%.+]] = arith.truncf %[[IN]] : f32 to bf16 + // CHECK: linalg.yield [[TRUNCF]] : bf16 + // CHECK: } + // CHECK: tensor.expand_shape [[RES]] {{\[}}[0, 1]] output_shape [1, 4] : tensor<4xbf16> into tensor<1x4xbf16> + %0 = tosa.reduce_sum %arg0 {axis = 0 : i32} : (tensor<5x4xbf16>) -> tensor<1x4xbf16> + return +} + +// ----- + // CHECK-LABEL: @reduce_float // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xf32> func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () { From 273917e5c0d935af5736770bdc9d0b03ce04dd8c Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 15 Sep 2025 10:39:07 +0100 Subject: [PATCH 314/734] [AArch64] Update and extend some GlobalMerge tests. NFC --- .../CodeGen/AArch64/aarch64-tail-dup-size.ll | 30 +++++++-- .../CodeGen/AArch64/global-merge-external.ll | 28 ++++++++ .../CodeGen/AArch64/global-merge-minsize.ll | 14 ++-- .../AArch64/local-bounds-single-trap.ll | 67 +++++++++++-------- 4 files changed, 100 insertions(+), 39 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/global-merge-external.ll diff --git a/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll index be07404f4b2fc..f37c942ab950c 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll @@ -4,7 +4,7 @@ ; RUN: llc -mtriple=aarch64-none-linux -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2 ; RUN: llc -mtriple=aarch64-none-linux -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2 -; RUN: llc -mtriple=aarch64-none-linux -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3 +; RUN: llc -mtriple=aarch64-none-linux -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O2-6 %a = type { ptr, i32, %b } %b = type { %c } @@ -29,7 +29,7 @@ define dso_local void @testcase(ptr nocapture %arg){ ; CHECK-O2-NEXT: .LBB0_3: // %if.end ; CHECK-O2-NEXT: adrp x9, global_int ; CHECK-O2-NEXT: add x2, x8, #16 -; CHECK-O2-NEXT: mov w0, #10 +; CHECK-O2-NEXT: mov w0, #10 // =0xa ; CHECK-O2-NEXT: ldr w1, [x9, :lo12:global_int] ; CHECK-O2-NEXT: b externalfunc ; @@ -44,16 +44,38 @@ define dso_local void @testcase(ptr nocapture %arg){ ; CHECK-O3-NEXT: ldr x8, [x8, :lo12:global_ptr] ; CHECK-O3-NEXT: adrp x9, global_int ; CHECK-O3-NEXT: add x2, x8, #16 -; CHECK-O3-NEXT: mov w0, #10 +; CHECK-O3-NEXT: mov w0, #10 // =0xa ; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int] ; CHECK-O3-NEXT: b externalfunc ; CHECK-O3-NEXT: .LBB0_2: ; CHECK-O3-NEXT: mov x8, xzr ; CHECK-O3-NEXT: adrp x9, global_int ; CHECK-O3-NEXT: add x2, x8, #16 -; CHECK-O3-NEXT: mov w0, #10 +; CHECK-O3-NEXT: mov w0, #10 // =0xa ; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int] ; CHECK-O3-NEXT: b externalfunc +; +; CHECK-O2-6-LABEL: testcase: +; CHECK-O2-6: // %bb.0: // %entry +; CHECK-O2-6-NEXT: adrp x8, global_ptr +; CHECK-O2-6-NEXT: ldr x9, [x8, :lo12:global_ptr] +; CHECK-O2-6-NEXT: cbz x9, .LBB0_2 +; CHECK-O2-6-NEXT: // %bb.1: // %if.then +; CHECK-O2-6-NEXT: ldr x9, [x9] +; CHECK-O2-6-NEXT: str x9, [x0] +; CHECK-O2-6-NEXT: ldr x8, [x8, :lo12:global_ptr] +; CHECK-O2-6-NEXT: adrp x9, global_int +; CHECK-O2-6-NEXT: add x2, x8, #16 +; CHECK-O2-6-NEXT: mov w0, #10 // =0xa +; CHECK-O2-6-NEXT: ldr w1, [x9, :lo12:global_int] +; CHECK-O2-6-NEXT: b externalfunc +; CHECK-O2-6-NEXT: .LBB0_2: +; CHECK-O2-6-NEXT: mov x8, xzr +; CHECK-O2-6-NEXT: adrp x9, global_int +; CHECK-O2-6-NEXT: add x2, x8, #16 +; CHECK-O2-6-NEXT: mov w0, #10 // =0xa +; CHECK-O2-6-NEXT: ldr w1, [x9, :lo12:global_int] +; CHECK-O2-6-NEXT: b externalfunc entry: %0 = load ptr, ptr @global_ptr, align 8 %cmp.not = icmp eq ptr %0, null diff --git a/llvm/test/CodeGen/AArch64/global-merge-external.ll b/llvm/test/CodeGen/AArch64/global-merge-external.ll new file mode 100644 index 0000000000000..fb3753c54e0ca --- /dev/null +++ b/llvm/test/CodeGen/AArch64/global-merge-external.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-O2 +; RUN: llc -O3 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-O3 + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +@global0 = dso_local local_unnamed_addr global i32 0, align 4 +@global1 = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local i32 @func() { +; CHECK-LABEL: func: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, global0 +; CHECK-NEXT: adrp x9, global1 +; CHECK-NEXT: ldr w8, [x8, :lo12:global0] +; CHECK-NEXT: ldr w9, [x9, :lo12:global1] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret +entry: + %0 = load i32, ptr @global0, align 4 + %1 = load i32, ptr @global1, align 4 + %add = add nsw i32 %1, %0 + ret i32 %add +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-O2: {{.*}} +; CHECK-O3: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/global-merge-minsize.ll b/llvm/test/CodeGen/AArch64/global-merge-minsize.ll index 8f569ecd9e634..f952580ba4540 100644 --- a/llvm/test/CodeGen/AArch64/global-merge-minsize.ll +++ b/llvm/test/CodeGen/AArch64/global-merge-minsize.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -7,12 +8,13 @@ target triple = "aarch64" @global1 = dso_local local_unnamed_addr global i32 0, align 4 define dso_local i32 @func() minsize optsize { -; CHECK-LABEL: @func -; CHECK: adrp x8, .L_MergedGlobals -; CHECK-NEXT: add x8, x8, :lo12:.L_MergedGlobals -; CHECK-NEXT: ldp w9, w8, [x8] -; CHECK-NEXT: add w0, w8, w9 -; CHECK-NEXT: ret +; CHECK-LABEL: func: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .L_MergedGlobals +; CHECK-NEXT: add x8, x8, :lo12:.L_MergedGlobals +; CHECK-NEXT: ldp w9, w8, [x8] +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ret entry: %0 = load i32, ptr @global0, align 4 %1 = load i32, ptr @global1, align 4 diff --git a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll index 8b8a3e430df69..caf6f1a83f762 100644 --- a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll +++ b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -O3 -mtriple arm64-linux -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-ASM ; This test checks that nomerge correctly prevents the traps from being merged ; in the compiled code. @@ -9,36 +10,44 @@ ; Function Attrs: noinline nounwind uwtable define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 { +; CHECK-ASM-LABEL: f8: +; CHECK-ASM: // %bb.0: // %entry +; CHECK-ASM-NEXT: sub sp, sp, #16 +; CHECK-ASM-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ASM-NEXT: .cfi_remember_state +; CHECK-ASM-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-ASM-NEXT: sxtw x8, w0 +; CHECK-ASM-NEXT: stp w1, w0, [sp, #8] +; CHECK-ASM-NEXT: cmp x8, #10 +; CHECK-ASM-NEXT: b.hi .LBB0_5 +; CHECK-ASM-NEXT: // %bb.1: // %entry +; CHECK-ASM-NEXT: mov w9, #10 // =0xa +; CHECK-ASM-NEXT: sub x9, x9, x8 +; CHECK-ASM-NEXT: cbz x9, .LBB0_5 +; CHECK-ASM-NEXT: // %bb.2: +; CHECK-ASM-NEXT: ldrsw x9, [sp, #8] +; CHECK-ASM-NEXT: adrp x10, B +; CHECK-ASM-NEXT: add x10, x10, :lo12:B +; CHECK-ASM-NEXT: strb wzr, [x10, x8] +; CHECK-ASM-NEXT: cmp x9, #10 +; CHECK-ASM-NEXT: b.hi .LBB0_6 +; CHECK-ASM-NEXT: // %bb.3: +; CHECK-ASM-NEXT: mov w8, #10 // =0xa +; CHECK-ASM-NEXT: sub x8, x8, x9 +; CHECK-ASM-NEXT: cbz x8, .LBB0_6 +; CHECK-ASM-NEXT: // %bb.4: +; CHECK-ASM-NEXT: adrp x8, B2 +; CHECK-ASM-NEXT: add x8, x8, :lo12:B2 +; CHECK-ASM-NEXT: strb wzr, [x8, x9] +; CHECK-ASM-NEXT: add sp, sp, #16 +; CHECK-ASM-NEXT: .cfi_def_cfa_offset 0 +; CHECK-ASM-NEXT: ret +; CHECK-ASM-NEXT: .LBB0_5: // %trap +; CHECK-ASM-NEXT: .cfi_restore_state +; CHECK-ASM-NEXT: brk #0x1 +; CHECK-ASM-NEXT: .LBB0_6: // %trap3 +; CHECK-ASM-NEXT: brk #0x1 entry: -; CHECK-ASM: cmp x8, #10 -; CHECK-ASM: b.hi .LBB0_5 -; CHECK-ASM: // %bb.1: // %entry -; CHECK-ASM: mov w9, #10 // =0xa -; CHECK-ASM: sub x9, x9, x8 -; CHECK-ASM: cbz x9, .LBB0_5 -; CHECK-ASM: // %bb.2: -; CHECK-ASM: ldrsw x9, [sp, #8] -; CHECK-ASM: adrp x10, B -; CHECK-ASM: add x10, x10, :lo12:B -; CHECK-ASM: strb wzr, [x10, x8] -; CHECK-ASM: cmp x9, #10 -; CHECK-ASM: b.hi .LBB0_6 -; CHECK-ASM: // %bb.3: -; CHECK-ASM: mov w8, #10 // =0xa -; CHECK-ASM: sub x8, x8, x9 -; CHECK-ASM: cbz x8, .LBB0_6 -; CHECK-ASM: // %bb.4: -; CHECK-ASM: adrp x8, B2 -; CHECK-ASM: add x8, x8, :lo12:B2 -; CHECK-ASM: strb wzr, [x8, x9] -; CHECK-ASM: add sp, sp, #16 -; CHECK-ASM: .cfi_def_cfa_offset 0 -; CHECK-ASM: ret -; CHECK-ASM: .LBB0_5: // %trap -; CHECK-ASM: .cfi_restore_state -; CHECK-ASM: brk #0x1 -; CHECK-ASM: .LBB0_6: // %trap3 -; CHECK-ASM: brk #0x1 %i.addr = alloca i32, align 4 %k.addr = alloca i32, align 4 store i32 %i, ptr %i.addr, align 4 From 44061d14fb03ebbd38050c628ed009ae4db2714c Mon Sep 17 00:00:00 2001 From: Harsh Tiwary Date: Mon, 15 Sep 2025 02:40:27 -0700 Subject: [PATCH 315/734] [Headers][X86] Allow AVX512 masked blend intrinsics to be used in constexpr (#156234) This patch enables AVX-512 masked blend intrinsics to be usable in constant expressions (`constexpr`) across various vector widths (128-bit, 256-bit, 512-bit). It updates the respective Clang headers to include the `__DEFAULT_FN_ATTRS_CONSTEXPR` annotation where applicable, and supplements the change with thorough `TEST_CONSTEXPR` checks in the X86 CodeGen test suite to validate constexpr evaluation. Fixes #155796. --------- Co-authored-by: Simon Pilgrim --- clang/lib/Headers/avx512bwintrin.h | 10 +- clang/lib/Headers/avx512fintrin.h | 20 ++-- clang/lib/Headers/avx512fp16intrin.h | 2 +- clang/lib/Headers/avx512vlbwintrin.h | 20 ++-- clang/lib/Headers/avx512vlfp16intrin.h | 7 +- clang/lib/Headers/avx512vlintrin.h | 32 +++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 46 +++++++++ clang/test/CodeGen/X86/avx512vl-builtins.c | 99 +++++++++++++++++++ clang/test/CodeGen/X86/avx512vlbw-builtins.c | 33 +++++++ .../test/CodeGen/X86/avx512vlfp16-builtins.c | 43 ++++++++ 10 files changed, 261 insertions(+), 51 deletions(-) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 42fce7d89e1bb..77820a2ca041c 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -464,17 +464,15 @@ _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __W, (__v64qi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __W, (__v32hi) __A); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 7ba09039cd826..8ebfb75170e17 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3209,33 +3209,29 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, /* Vector Blend */ -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, (__v8df) __W, (__v8df) __A); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, (__v16sf) __W, (__v16sf) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, (__v8di) __W, (__v8di) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, (__v16si) __W, (__v16si) __A); diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h index d30b49e552e1b..4bd798129a25d 100644 --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -3309,7 +3309,7 @@ _mm512_reduce_min_ph(__m512h __V) { return __builtin_ia32_reduce_fmin_ph512(__V); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W, (__v32hf)__A); diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 6e3efa7b3562c..f1cd71af05ab5 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -452,33 +452,29 @@ _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __W, (__v16qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __W, (__v32qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __W, (__v8hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __W, (__v16hi) __A); diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index 8eb31eae6173b..ec766e31c6769 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -1995,14 +1995,13 @@ _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { (__v8sf)__C, (__mmask8)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U, - __m128h __A, - __m128h __W) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) { return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W, (__v8hf)__A); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W, (__v16hf)__A); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index d85ea23d5ee5a..5f5a54e7284c1 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -1498,57 +1498,57 @@ _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __W, (__v4si) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __W, (__v8si) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __W, (__v2df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __W, (__v4df) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __W, (__v4sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __W, (__v8sf) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __W, (__v2di) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __W, (__v4di) __A); diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 9d605efcbd758..3be708aea8a4d 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -854,11 +854,57 @@ __m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) { // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_blend_epi8(__U,__A,__W); } +TEST_CONSTEXPR(match_v64qi( + _mm512_mask_blend_epi8( + (__mmask64) 0x00000001, + (__m512i)(__v64qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m512i)(__v64qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25} + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); __m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) { // CHECK-LABEL: test_mm512_mask_blend_epi16 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_blend_epi16(__U,__A,__W); } +TEST_CONSTEXPR(match_v32hi( + _mm512_mask_blend_epi16( + (__mmask32) 0x00000001, + (__m512i)(__v32hi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m512i)(__v32hi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25} + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); + +__m512i test_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { + // CHECK-LABEL: test_mm512_mask_blend_epi32 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_mask_blend_epi32(__U, __A, __W); +} +TEST_CONSTEXPR(match_v16si( + _mm512_mask_blend_epi32( + (__mmask16) 0x0001, + (__m512i)(__v16si) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m512i)(__v16si){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25} + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); + +__m512i test_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { + // CHECK-LABEL: test_mm512_mask_blend_epi64 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_mask_blend_epi64(__U, __A, __W); +} + +TEST_CONSTEXPR(match_v8di( + _mm512_mask_blend_epi64( + (__mmask8)0x01, + (__m512i)(__v8di){2, 2, 2, 2, 2, 2, 2, 2}, + (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17} + ), + 10, 2, 2, 2, 2, 2, 2, 2 +)); + __m512i test_mm512_abs_epi8(__m512i __A) { // CHECK-LABEL: test_mm512_abs_epi8 // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 9daecd0d9875f..8cef11b12fb93 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -3622,41 +3622,140 @@ __m128i test_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_blend_epi32(__U,__A,__W); } +TEST_CONSTEXPR(match_v4si( + _mm_mask_blend_epi32( + (__mmask8)0x01, + (__m128i)(__v4si){2, 2, 2, 2}, + (__m128i)(__v4si){ 10,11,12,13 } + ), + 10, 2, 2, 2 +)); __m256i test_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) { // CHECK-LABEL: test_mm256_mask_blend_epi32 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_blend_epi32(__U,__A,__W); } +TEST_CONSTEXPR(match_v8si( + _mm256_mask_blend_epi32( + (__mmask8)0x01, + (__m256i)(__v8si){2, 2, 2, 2, 2, 2, 2, 2}, + (__m256i)(__v8si){ 10,11,12,13,14,15,16,17 } + ), + 10, 2, 2, 2, 2, 2, 2, 2 +)); __m128d test_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) { // CHECK-LABEL: test_mm_mask_blend_pd // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_blend_pd(__U,__A,__W); } +TEST_CONSTEXPR(match_m128d( + _mm_mask_blend_pd( + (__mmask8)0x01, + (__m128d)(__v2df){2.0, 2.0}, + (__m128d)(__v2df){10.0, 20.0} + ), + 10.0, 2.0 +)); __m256d test_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) { // CHECK-LABEL: test_mm256_mask_blend_pd // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_blend_pd(__U,__A,__W); } +TEST_CONSTEXPR(match_m256d( + _mm256_mask_blend_pd( + (__mmask8)0x01, + (__m256d)(__v4df){2.0, 2.0, 2.0, 2.0}, + (__m256d)(__v4df){10.0, 11.0, 12.0, 13.0} + ), + 10.0, 2.0, 2.0, 2.0 +)); + +__m512d test_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { + // CHECK-LABEL: test_mm512_mask_blend_pd + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask_blend_pd(__U, __A, __W); +} + +TEST_CONSTEXPR(match_m512d( + _mm512_mask_blend_pd( + (__mmask8)0x01, + (__m512d)(__v8df){2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0}, + (__m512d)(__v8df){10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0} + ), + 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 +)); + __m128 test_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) { // CHECK-LABEL: test_mm_mask_blend_ps // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_blend_ps(__U,__A,__W); } +TEST_CONSTEXPR(match_m128( + _mm_mask_blend_ps( + (__mmask8)0x01, + (__m128)(__v4sf){2.0f, 2.0f, 2.0f, 2.0f}, + (__m128)(__v4sf){10.0f, 11.0f, 12.0f, 13.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f +)); + __m256 test_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) { // CHECK-LABEL: test_mm256_mask_blend_ps // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_blend_ps(__U,__A,__W); } +TEST_CONSTEXPR(match_m256( + _mm256_mask_blend_ps( + (__mmask8)0x01, + (__m256)(__v8sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m256)(__v8sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); + +__m512 test_mm512_mask_blend_ps(__mmask8 __U, __m512 __A, __m512 __W) { + // CHECK-LABEL: test_mm512_mask_blend_ps + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask_blend_ps(__U, __A, __W); +} +TEST_CONSTEXPR(match_m512( + _mm512_mask_blend_ps( + (__mmask16)0x01, + (__m512)(__v16sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m512)(__v16sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); + __m128i test_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) { // CHECK-LABEL: test_mm_mask_blend_epi64 // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_blend_epi64(__U,__A,__W); } +TEST_CONSTEXPR(match_v2di( + _mm_mask_blend_epi64( + (__mmask8)0x01, + (__m128i)(__v2di){2, 2}, + (__m128i)(__v2di){ 10,11 } + ), + 10, 2 +)); __m256i test_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) { // CHECK-LABEL: test_mm256_mask_blend_epi64 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_blend_epi64(__U,__A,__W); } +TEST_CONSTEXPR(match_v4di( + _mm256_mask_blend_epi64( + (__mmask8)0x01, + (__m256i)(__v4di){2, 2, 2, 2}, + (__m256i)(__v4di){ 10,11,12,13 } + ), + 10, 2, 2, 2 +)); __m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_mask_compress_pd // CHECK: @llvm.x86.avx512.mask.compress diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index d62235a630fd8..d8f9a3ace6f38 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -893,23 +893,56 @@ __m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) { // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_blend_epi8(__U,__A,__W); } +TEST_CONSTEXPR(match_v16qi( + _mm_mask_blend_epi8( + (__mmask16)0x0001, + (__m128i)(__v16qi){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m128i)(__v16qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25 } + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); + __m256i test_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) { // CHECK-LABEL: test_mm256_mask_blend_epi8 // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_blend_epi8(__U,__A,__W); } +TEST_CONSTEXPR(match_v32qi( + _mm256_mask_blend_epi8( + (__mmask32) 0x00000001, + (__m256i)(__v32qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m256i)(__v32qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25} + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); __m128i test_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) { // CHECK-LABEL: test_mm_mask_blend_epi16 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_blend_epi16(__U,__A,__W); } +TEST_CONSTEXPR(match_v8hi( + _mm_mask_blend_epi16( + (__mmask8)0x01, + (__m128i)(__v8hi){2, 2, 2, 2, 2, 2, 2, 2}, + (__m128i)(__v8hi){ 10,11,12,13,14,15,16,17 } + ), + 10, 2, 2, 2, 2, 2, 2, 2 +)); __m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) { // CHECK-LABEL: test_mm256_mask_blend_epi16 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_blend_epi16(__U,__A,__W); } +TEST_CONSTEXPR(match_v16hi( + _mm256_mask_blend_epi16( + (__mmask16)0x0001, + (__m256i)(__v16hi){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m256i)(__v16hi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25 } + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); __m128i test_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_abs_epi8 diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c index fd6ea8fe6056d..badfa301e429d 100644 --- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c @@ -3016,6 +3016,14 @@ __m128h test_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) { // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}} return _mm_mask_blend_ph(__U, __A, __W); } +TEST_CONSTEXPR(match_m128h( + _mm_mask_blend_ph( + (__mmask8)0x01, + (__m128h)(__v8hf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m128h)(__v8hf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); __m256h test_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { // CHECK-LABEL: test_mm256_mask_blend_ph @@ -3023,6 +3031,41 @@ __m256h test_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} return _mm256_mask_blend_ph(__U, __A, __W); } +TEST_CONSTEXPR(match_m256h( + _mm256_mask_blend_ph( + (__mmask16)0x0001, + (__m256h)(__v16hf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m256h)(__v16hf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); + +__m512h test_mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) { + // CHECK-LABEL: test_mm512_mask_blend_ph + // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: %{{.*}} = select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}} + return _mm512_mask_blend_ph(__U, __A, __W); +} +TEST_CONSTEXPR(match_m512h( + _mm512_mask_blend_ph( + (__mmask32)0x00000001, + (__m512h)(__v32hf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m512h)(__v32hf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, + 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); __m128h test_mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) { // CHECK-LABEL: test_mm_permutex2var_ph From d8c2607fb1f4094db18e7716764738f9bc8489df Mon Sep 17 00:00:00 2001 From: Ivan Murashko Date: Mon, 15 Sep 2025 10:55:59 +0100 Subject: [PATCH 316/734] [clang][Sema] Fix false positive -Wshadow with structured binding captures (#157667) Previously, lambda init captures of structured bindings were incorrectly classified as regular shadow warnings (shown with `-Wshadow`), while regular parameter captures were correctly classified as `uncaptured-local` warnings (shown only with `-Wshadow-all`). This created inconsistent behavior: ```cpp void foo1(std::pair val) { [val = std::move(val)](){}(); // No warning with -Wshadow (correct) } void foo2(std::pair val) { auto [a, b] = val; [a = std::move(a)](){}(); // Warning with -Wshadow (incorrect) } ``` The fix extends the existing lambda capture classification logic in `CheckShadow()` to handle `BindingDecl` consistently with `VarDecl`, ensuring both cases show no warnings with `-Wshadow` and `uncaptured-local` warnings with `-Wshadow-all`. Fixes #68605. --------- Co-authored-by: Mariya Podchishchaeva --- clang/docs/ReleaseNotes.rst | 5 ++ clang/lib/Sema/SemaDecl.cpp | 42 +++++++---- clang/test/SemaCXX/PR68605.cpp | 72 +++++++++++++++++++ clang/test/SemaCXX/warn-shadow-in-lambdas.cpp | 9 ++- 4 files changed, 111 insertions(+), 17 deletions(-) create mode 100644 clang/test/SemaCXX/PR68605.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 41bec2666f939..bdf8334f78cea 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -316,6 +316,11 @@ Bug Fixes in This Version - Builtin elementwise operators now accept vector arguments that have different qualifiers on their elements. For example, vector of 4 ``const float`` values and vector of 4 ``float`` values. (#GH155405) +- Fixed inconsistent shadow warnings for lambda capture of structured bindings. + Previously, ``[val = val]`` (regular parameter) produced no warnings with ``-Wshadow`` + while ``[a = a]`` (where ``a`` is from ``auto [a, b] = std::make_pair(1, 2)``) + incorrectly produced warnings. Both cases now consistently show no warnings with + ``-Wshadow`` and show uncaptured-local warnings with ``-Wshadow-all``. (#GH68605) - Fixed a failed assertion with a negative limit parameter value inside of ``__has_embed``. (#GH157842) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 2b0ddb584c37e..45cfb66996ce6 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8395,7 +8395,7 @@ static ShadowedDeclKind computeShadowedDeclKind(const NamedDecl *ShadowedDecl, /// Return the location of the capture if the given lambda captures the given /// variable \p VD, or an invalid source location otherwise. static SourceLocation getCaptureLocation(const LambdaScopeInfo *LSI, - const VarDecl *VD) { + const ValueDecl *VD) { for (const Capture &Capture : LSI->Captures) { if (Capture.isVariableCapture() && Capture.getVariable() == VD) return Capture.getLocation(); @@ -8492,7 +8492,9 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl, if (isa(D) && NewDC && isa(NewDC)) { if (const auto *RD = dyn_cast(NewDC->getParent())) { if (RD->isLambda() && OldDC->Encloses(NewDC->getLexicalParent())) { - if (const auto *VD = dyn_cast(ShadowedDecl)) { + // Handle both VarDecl and BindingDecl in lambda contexts + if (isa(ShadowedDecl)) { + const auto *VD = cast(ShadowedDecl); const auto *LSI = cast(getCurFunction()); if (RD->getLambdaCaptureDefault() == LCD_None) { // Try to avoid warnings for lambdas with an explicit capture @@ -8521,18 +8523,27 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl, return; } } - if (const auto *VD = dyn_cast(ShadowedDecl); - VD && VD->hasLocalStorage()) { - // A variable can't shadow a local variable in an enclosing scope, if - // they are separated by a non-capturing declaration context. - for (DeclContext *ParentDC = NewDC; - ParentDC && !ParentDC->Equals(OldDC); - ParentDC = getLambdaAwareParentOfDeclContext(ParentDC)) { - // Only block literals, captured statements, and lambda expressions - // can capture; other scopes don't. - if (!isa(ParentDC) && !isa(ParentDC) && - !isLambdaCallOperator(ParentDC)) { - return; + // Apply scoping logic to both VarDecl and BindingDecl with local storage + if (isa(ShadowedDecl)) { + bool HasLocalStorage = false; + if (const auto *VD = dyn_cast(ShadowedDecl)) + HasLocalStorage = VD->hasLocalStorage(); + else if (const auto *BD = dyn_cast(ShadowedDecl)) + HasLocalStorage = + cast(BD->getDecomposedDecl())->hasLocalStorage(); + + if (HasLocalStorage) { + // A variable can't shadow a local variable or binding in an enclosing + // scope, if they are separated by a non-capturing declaration + // context. + for (DeclContext *ParentDC = NewDC; + ParentDC && !ParentDC->Equals(OldDC); + ParentDC = getLambdaAwareParentOfDeclContext(ParentDC)) { + // Only block literals, captured statements, and lambda expressions + // can capture; other scopes don't. + if (!isa(ParentDC) && !isa(ParentDC) && + !isLambdaCallOperator(ParentDC)) + return; } } } @@ -8579,7 +8590,8 @@ void Sema::DiagnoseShadowingLambdaDecls(const LambdaScopeInfo *LSI) { const NamedDecl *ShadowedDecl = Shadow.ShadowedDecl; // Try to avoid the warning when the shadowed decl isn't captured. const DeclContext *OldDC = ShadowedDecl->getDeclContext(); - if (const auto *VD = dyn_cast(ShadowedDecl)) { + if (isa(ShadowedDecl)) { + const auto *VD = cast(ShadowedDecl); SourceLocation CaptureLoc = getCaptureLocation(LSI, VD); Diag(Shadow.VD->getLocation(), CaptureLoc.isInvalid() ? diag::warn_decl_shadow_uncaptured_local diff --git a/clang/test/SemaCXX/PR68605.cpp b/clang/test/SemaCXX/PR68605.cpp new file mode 100644 index 0000000000000..97eb858b77246 --- /dev/null +++ b/clang/test/SemaCXX/PR68605.cpp @@ -0,0 +1,72 @@ +// RUN: %clang_cc1 -verify -fsyntax-only -std=c++20 -Wshadow %s +// RUN: %clang_cc1 -verify=all -fsyntax-only -std=c++20 -Wshadow-all %s + +// Test for issue #68605: Inconsistent shadow warnings for lambda capture of structured bindings. +// +// The issue was that structured binding lambda captures were incorrectly classified +// as regular shadow warnings (shown with -Wshadow) while regular parameter captures +// were classified as uncaptured-local warnings (shown only with -Wshadow-all). +// +// This test validates that both VarDecl and BindingDecl lambda captures now +// behave consistently: no warnings with -Wshadow, but uncaptured-local warnings +// with -Wshadow-all. + +namespace std { + template T&& move(T&& t) { return static_cast(t); } +} + +namespace issue_68605 { + +// Simple pair-like struct for testing +struct Pair { + int first; + int second; + Pair(int f, int s) : first(f), second(s) {} +}; + +// Test case 1: Regular parameter - consistent behavior +void foo1(Pair val) { // all-note {{previous declaration is here}} + [val = std::move(val)](){}(); // all-warning {{declaration shadows a local variable}} +} + +// Test case 2: Structured binding - now consistent with regular parameter +void foo2(Pair val) { + auto [a,b] = val; // all-note {{previous declaration is here}} + [a = std::move(a)](){}(); // all-warning {{declaration shadows a structured binding}} +} + +// Test case 3: Multiple captures showing consistent behavior +void foo3() { + Pair data{42, 100}; + auto [id, value] = data; // all-note 2{{previous declaration is here}} + + // Both show consistent uncaptured-local warnings with -Wshadow-all + auto lambda1 = [id = id](){ return id; }; // all-warning {{declaration shadows a structured binding}} + auto lambda2 = [value = value](){ return value; }; // all-warning {{declaration shadows a structured binding}} +} + +// Test case 4: Mixed scenario showing consistent behavior +void foo4() { + int regular_var = 10; // all-note {{previous declaration is here}} + Pair pair_data{1, 2}; + auto [x, y] = pair_data; // all-note 2{{previous declaration is here}} + + // All captures now show consistent uncaptured-local warnings with -Wshadow-all + auto lambda1 = [regular_var = regular_var](){}; // all-warning {{declaration shadows a local variable}} + auto lambda2 = [x = x](){}; // all-warning {{declaration shadows a structured binding}} + auto lambda3 = [y = y](){}; // all-warning {{declaration shadows a structured binding}} +} + +// Test case 5: Ensure we don't break existing shadow detection for actual shadowing +void foo5() { + int outer = 5; // expected-note {{previous declaration is here}} all-note {{previous declaration is here}} + auto [a, b] = Pair{1, 2}; // expected-note {{previous declaration is here}} all-note {{previous declaration is here}} + + // This SHOULD still warn - it's actual shadowing within the lambda body + auto lambda = [outer, a](){ // expected-note {{variable 'outer' is explicitly captured here}} all-note {{variable 'outer' is explicitly captured here}} expected-note {{variable 'a' is explicitly captured here}} all-note {{variable 'a' is explicitly captured here}} + int outer = 10; // expected-warning {{declaration shadows a local variable}} all-warning {{declaration shadows a local variable}} + int a = 20; // expected-warning {{declaration shadows a structured binding}} all-warning {{declaration shadows a structured binding}} + }; +} + +} // namespace issue_68605 diff --git a/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp b/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp index d54b394df4eb8..2388c5f16e4ca 100644 --- a/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp +++ b/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp @@ -258,10 +258,15 @@ struct S { }; int foo() { - auto [a] = S{0}; // expected-note {{previous}} \ - // cxx14-warning {{decomposition declarations are a C++17 extension}} +#ifdef AVOID + auto [a] = S{0}; // cxx14-warning {{decomposition declarations are a C++17 extension}} + [a = a] () { // No warning with basic -Wshadow due to uncaptured-local classification + }(); +#else + auto [a] = S{0}; // cxx14-warning {{decomposition declarations are a C++17 extension}} expected-note {{previous declaration is here}} [a = a] () { // expected-warning {{declaration shadows a structured binding}} }(); +#endif } } From 2c091e6aec2d48fbcafc9cc5909a62f0321db1fd Mon Sep 17 00:00:00 2001 From: macurtis-amd Date: Mon, 15 Sep 2025 05:03:02 -0500 Subject: [PATCH 317/734] AMDGPU: Report unaligned scratch access as fast if supported by tgt (#158036) This enables more consecutive load folding during aggressive-instcombine. The original motivating example provided by Jeff Byrnes: https://godbolt.org/z/8ebcTEjTs Example provided by Nikita Popov: https://godbolt.org/z/Gv1j4vjqE as part of my original attempt to fix the issue (PR [#133301](https://github.com/llvm/llvm-project/pull/133301), see his [comment](https://github.com/llvm/llvm-project/pull/133301#issuecomment-2984905809)). This changes the value of `IsFast` returned by `In SITargetLowering::allowsMisalignedMemoryAccessesImpl` to be non-zero for private and flat addresses if the subtarget supports unaligned scratch accesses. This enables aggressive-instcombine to do more folding of consecutive loads (see [here](https://github.com/llvm/llvm-project/blob/cbd496581fb6953a9a8d8387a010cc3a67d4654b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp#L811)). Summary performance impact on [composable_kernel](https://github.com/ROCm/composable_kernel): |GPU|speedup (geomean*)| |---|---| |MI300A| 1.11| |MI300X| 1.14| |MI350X| 1.03| [*] Just to be clear, this is the geomean across kernels which were impacted by this change - not across all CK kernels. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 +- .../test/CodeGen/AMDGPU/memcpy-fixed-align.ll | 16 +- llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll | 152 +++-- .../AMDGPU/memcpy-param-combinations.ll | 558 ++++++++---------- .../AMDGPU/memmove-param-combinations.ll | 64 +- .../AMDGPU/fold-consecutive-loads.ll | 234 ++++++++ .../AMDGPU/lit.local.cfg | 2 + 7 files changed, 594 insertions(+), 440 deletions(-) create mode 100644 llvm/test/Transforms/AggressiveInstCombine/AMDGPU/fold-consecutive-loads.ll create mode 100644 llvm/test/Transforms/AggressiveInstCombine/AMDGPU/lit.local.cfg diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3332723b038f5..9acc4b6de3501 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2098,10 +2098,16 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || AddrSpace == AMDGPUAS::FLAT_ADDRESS) { bool AlignedBy4 = Alignment >= Align(4); + if (Subtarget->hasUnalignedScratchAccessEnabled()) { + if (IsFast) + *IsFast = AlignedBy4 ? Size : 1; + return true; + } + if (IsFast) *IsFast = AlignedBy4; - return AlignedBy4 || Subtarget->hasUnalignedScratchAccessEnabled(); + return AlignedBy4; } // So long as they are correct, wide global memory operations perform better diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll b/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll index 37a261cab7563..e8bd640aa5409 100644 --- a/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll +++ b/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll @@ -7,23 +7,25 @@ define void @memcpy_fixed_align(ptr addrspace(5) %dst, ptr addrspace(1) %src) { ; MUBUF-LABEL: memcpy_fixed_align: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: global_load_dwordx2 v[11:12], v[1:2], off offset:32 ; MUBUF-NEXT: global_load_dwordx4 v[3:6], v[1:2], off ; MUBUF-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 +; MUBUF-NEXT: global_load_dwordx4 v[11:14], v[1:2], off offset:24 ; MUBUF-NEXT: s_lshr_b32 s4, s32, 6 ; MUBUF-NEXT: s_waitcnt vmcnt(2) -; MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 -; MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:36 -; MUBUF-NEXT: s_waitcnt vmcnt(3) ; MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:12 ; MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:8 ; MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:4 ; MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], s32 -; MUBUF-NEXT: s_waitcnt vmcnt(6) +; MUBUF-NEXT: s_waitcnt vmcnt(5) ; MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:16 +; MUBUF-NEXT: s_waitcnt vmcnt(8) +; MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:36 +; MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:32 +; MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:28 +; MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; use s4 ; MUBUF-NEXT: ;;#ASMEND @@ -35,14 +37,14 @@ define void @memcpy_fixed_align(ptr addrspace(5) %dst, ptr addrspace(1) %src) { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; FLATSCR-NEXT: global_load_dwordx4 v[3:6], v[1:2], off ; FLATSCR-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 -; FLATSCR-NEXT: global_load_dwordx2 v[11:12], v[1:2], off offset:32 +; FLATSCR-NEXT: global_load_dwordx4 v[11:14], v[1:2], off offset:24 ; FLATSCR-NEXT: s_mov_b32 s0, s32 ; FLATSCR-NEXT: s_waitcnt vmcnt(2) ; FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s32 ; FLATSCR-NEXT: s_waitcnt vmcnt(2) ; FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s32 offset:16 ; FLATSCR-NEXT: s_waitcnt vmcnt(2) -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[11:12], s32 offset:32 +; FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s32 offset:24 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s0 ; FLATSCR-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll b/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll index 0003366f3a3ea..5b7c36559a366 100644 --- a/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll +++ b/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll @@ -12,21 +12,19 @@ define amdgpu_kernel void @memcpy_p0_p0_minsize(ptr %dest, ptr readonly %src) #0 ; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17 ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v12, s3 -; CHECK-NEXT: v_mov_b32_e32 v11, s2 -; CHECK-NEXT: flat_load_ubyte v13, v[11:12] offset:46 -; CHECK-NEXT: flat_load_ushort v14, v[11:12] offset:44 -; CHECK-NEXT: flat_load_dwordx3 v[8:10], v[11:12] offset:32 -; CHECK-NEXT: flat_load_dwordx4 v[0:3], v[11:12] offset:16 -; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[11:12] -; CHECK-NEXT: v_mov_b32_e32 v12, s1 -; CHECK-NEXT: v_mov_b32_e32 v11, s0 +; CHECK-NEXT: v_mov_b32_e32 v9, s3 +; CHECK-NEXT: v_mov_b32_e32 v8, s2 +; CHECK-NEXT: flat_load_dwordx2 v[10:11], v[8:9] offset:32 +; CHECK-NEXT: flat_load_dwordx2 v[12:13], v[8:9] offset:39 +; CHECK-NEXT: flat_load_dwordx4 v[0:3], v[8:9] +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[8:9] offset:16 +; CHECK-NEXT: v_mov_b32_e32 v9, s1 +; CHECK-NEXT: v_mov_b32_e32 v8, s0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: flat_store_byte v[11:12], v13 offset:46 -; CHECK-NEXT: flat_store_short v[11:12], v14 offset:44 -; CHECK-NEXT: flat_store_dwordx3 v[11:12], v[8:10] offset:32 -; CHECK-NEXT: flat_store_dwordx4 v[11:12], v[0:3] offset:16 -; CHECK-NEXT: flat_store_dwordx4 v[11:12], v[4:7] +; CHECK-NEXT: flat_store_dwordx2 v[8:9], v[10:11] offset:32 +; CHECK-NEXT: flat_store_dwordx2 v[8:9], v[12:13] offset:39 +; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[4:7] offset:16 ; CHECK-NEXT: s_endpgm entry: tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 47, i1 false) @@ -173,33 +171,33 @@ define amdgpu_kernel void @memcpy_p0_p5_minsize(ptr %generic, ptr addrspace(5) % ; CHECK-NEXT: v_mov_b32_e32 v26, s0 ; CHECK-NEXT: buffer_load_dword v3, v26, s[20:23], 0 offen offset:124 ; CHECK-NEXT: buffer_load_dword v2, v26, s[20:23], 0 offen offset:120 -; CHECK-NEXT: buffer_load_dword v5, v26, s[20:23], 0 offen offset:100 -; CHECK-NEXT: buffer_load_dword v7, v26, s[20:23], 0 offen offset:108 ; CHECK-NEXT: buffer_load_dword v1, v26, s[20:23], 0 offen offset:116 ; CHECK-NEXT: buffer_load_dword v0, v26, s[20:23], 0 offen offset:112 +; CHECK-NEXT: buffer_load_dword v7, v26, s[20:23], 0 offen offset:108 ; CHECK-NEXT: buffer_load_dword v6, v26, s[20:23], 0 offen offset:104 +; CHECK-NEXT: buffer_load_dword v5, v26, s[20:23], 0 offen offset:100 ; CHECK-NEXT: buffer_load_dword v4, v26, s[20:23], 0 offen offset:96 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; CHECK-NEXT: buffer_load_dword v8, v26, s[20:23], 0 offen offset:32 -; CHECK-NEXT: buffer_load_dword v9, v26, s[20:23], 0 offen offset:36 -; CHECK-NEXT: buffer_load_dword v10, v26, s[20:23], 0 offen offset:40 -; CHECK-NEXT: buffer_load_dword v11, v26, s[20:23], 0 offen offset:44 -; CHECK-NEXT: buffer_load_dword v12, v26, s[20:23], 0 offen offset:48 -; CHECK-NEXT: buffer_load_dword v13, v26, s[20:23], 0 offen offset:52 -; CHECK-NEXT: buffer_load_dword v14, v26, s[20:23], 0 offen offset:56 -; CHECK-NEXT: buffer_load_dword v15, v26, s[20:23], 0 offen offset:60 -; CHECK-NEXT: buffer_load_dword v17, v26, s[20:23], 0 offen offset:68 -; CHECK-NEXT: buffer_load_dword v19, v26, s[20:23], 0 offen offset:76 -; CHECK-NEXT: buffer_load_dword v21, v26, s[20:23], 0 offen offset:84 -; CHECK-NEXT: buffer_load_dword v23, v26, s[20:23], 0 offen offset:92 -; CHECK-NEXT: buffer_load_dword v22, v26, s[20:23], 0 offen offset:88 -; CHECK-NEXT: buffer_load_dword v20, v26, s[20:23], 0 offen offset:80 -; CHECK-NEXT: buffer_load_dword v18, v26, s[20:23], 0 offen offset:72 -; CHECK-NEXT: buffer_load_dword v16, v26, s[20:23], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v11, v26, s[20:23], 0 offen offset:92 +; CHECK-NEXT: buffer_load_dword v10, v26, s[20:23], 0 offen offset:88 +; CHECK-NEXT: buffer_load_dword v9, v26, s[20:23], 0 offen offset:84 +; CHECK-NEXT: buffer_load_dword v8, v26, s[20:23], 0 offen offset:80 +; CHECK-NEXT: buffer_load_dword v15, v26, s[20:23], 0 offen offset:76 +; CHECK-NEXT: buffer_load_dword v14, v26, s[20:23], 0 offen offset:72 +; CHECK-NEXT: buffer_load_dword v13, v26, s[20:23], 0 offen offset:68 +; CHECK-NEXT: buffer_load_dword v12, v26, s[20:23], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v16, v26, s[20:23], 0 offen offset:32 +; CHECK-NEXT: buffer_load_dword v17, v26, s[20:23], 0 offen offset:36 +; CHECK-NEXT: buffer_load_dword v18, v26, s[20:23], 0 offen offset:40 +; CHECK-NEXT: buffer_load_dword v19, v26, s[20:23], 0 offen offset:44 +; CHECK-NEXT: buffer_load_dword v20, v26, s[20:23], 0 offen offset:48 +; CHECK-NEXT: buffer_load_dword v21, v26, s[20:23], 0 offen offset:52 +; CHECK-NEXT: buffer_load_dword v22, v26, s[20:23], 0 offen offset:56 +; CHECK-NEXT: buffer_load_dword v23, v26, s[20:23], 0 offen offset:60 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_mov_b32_e32 v25, s1 ; CHECK-NEXT: v_mov_b32_e32 v24, s0 -; CHECK-NEXT: s_waitcnt vmcnt(18) +; CHECK-NEXT: s_waitcnt vmcnt(20) ; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[0:3] offset:112 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[4:7] offset:96 @@ -213,10 +211,10 @@ define amdgpu_kernel void @memcpy_p0_p5_minsize(ptr %generic, ptr addrspace(5) % ; CHECK-NEXT: buffer_load_dword v7, v26, s[20:23], 0 offen offset:28 ; CHECK-NEXT: buffer_load_dword v3, v26, s[20:23], 0 offen offset:12 ; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[20:23] offset:80 -; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[16:19] offset:64 -; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[12:15] offset:48 -; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[8:11] offset:32 +; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[8:11] offset:80 +; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[12:15] offset:64 +; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[20:23] offset:48 +; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[16:19] offset:32 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[4:7] offset:16 ; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[0:3] @@ -281,8 +279,8 @@ define amdgpu_kernel void @memcpy_p0_p3_minsize(ptr %generic) #0 { ; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[8:11] offset:32 ; CHECK-NEXT: ds_read2_b64 v[0:3], v16 offset0:8 offset1:9 ; CHECK-NEXT: ds_read2_b64 v[4:7], v16 offset0:10 offset1:11 -; CHECK-NEXT: ds_read2_b64 v[8:11], v16 offset0:12 offset1:13 -; CHECK-NEXT: ds_read2_b64 v[16:19], v16 offset0:14 offset1:15 +; CHECK-NEXT: ds_read_b128 v[8:11], v16 offset:96 +; CHECK-NEXT: ds_read_b128 v[16:19], v16 offset:112 ; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[12:15] offset:48 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[0:3] offset:64 @@ -302,21 +300,19 @@ define amdgpu_kernel void @memcpy_p0_p0_optsize(ptr %dest, ptr %src) #1 { ; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17 ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v12, s3 -; CHECK-NEXT: v_mov_b32_e32 v11, s2 -; CHECK-NEXT: flat_load_ubyte v13, v[11:12] offset:46 -; CHECK-NEXT: flat_load_ushort v14, v[11:12] offset:44 -; CHECK-NEXT: flat_load_dwordx3 v[8:10], v[11:12] offset:32 -; CHECK-NEXT: flat_load_dwordx4 v[0:3], v[11:12] offset:16 -; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[11:12] -; CHECK-NEXT: v_mov_b32_e32 v12, s1 -; CHECK-NEXT: v_mov_b32_e32 v11, s0 +; CHECK-NEXT: v_mov_b32_e32 v9, s3 +; CHECK-NEXT: v_mov_b32_e32 v8, s2 +; CHECK-NEXT: flat_load_dwordx2 v[10:11], v[8:9] offset:32 +; CHECK-NEXT: flat_load_dwordx2 v[12:13], v[8:9] offset:39 +; CHECK-NEXT: flat_load_dwordx4 v[0:3], v[8:9] +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[8:9] offset:16 +; CHECK-NEXT: v_mov_b32_e32 v9, s1 +; CHECK-NEXT: v_mov_b32_e32 v8, s0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: flat_store_byte v[11:12], v13 offset:46 -; CHECK-NEXT: flat_store_short v[11:12], v14 offset:44 -; CHECK-NEXT: flat_store_dwordx3 v[11:12], v[8:10] offset:32 -; CHECK-NEXT: flat_store_dwordx4 v[11:12], v[0:3] offset:16 -; CHECK-NEXT: flat_store_dwordx4 v[11:12], v[4:7] +; CHECK-NEXT: flat_store_dwordx2 v[8:9], v[10:11] offset:32 +; CHECK-NEXT: flat_store_dwordx2 v[8:9], v[12:13] offset:39 +; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[4:7] offset:16 ; CHECK-NEXT: s_endpgm entry: tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 47, i1 false) @@ -463,33 +459,33 @@ define amdgpu_kernel void @memcpy_p0_p5_optsize(ptr %generic, ptr addrspace(5) % ; CHECK-NEXT: v_mov_b32_e32 v26, s0 ; CHECK-NEXT: buffer_load_dword v3, v26, s[20:23], 0 offen offset:124 ; CHECK-NEXT: buffer_load_dword v2, v26, s[20:23], 0 offen offset:120 -; CHECK-NEXT: buffer_load_dword v5, v26, s[20:23], 0 offen offset:100 -; CHECK-NEXT: buffer_load_dword v7, v26, s[20:23], 0 offen offset:108 ; CHECK-NEXT: buffer_load_dword v1, v26, s[20:23], 0 offen offset:116 ; CHECK-NEXT: buffer_load_dword v0, v26, s[20:23], 0 offen offset:112 +; CHECK-NEXT: buffer_load_dword v7, v26, s[20:23], 0 offen offset:108 ; CHECK-NEXT: buffer_load_dword v6, v26, s[20:23], 0 offen offset:104 +; CHECK-NEXT: buffer_load_dword v5, v26, s[20:23], 0 offen offset:100 ; CHECK-NEXT: buffer_load_dword v4, v26, s[20:23], 0 offen offset:96 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; CHECK-NEXT: buffer_load_dword v8, v26, s[20:23], 0 offen offset:32 -; CHECK-NEXT: buffer_load_dword v9, v26, s[20:23], 0 offen offset:36 -; CHECK-NEXT: buffer_load_dword v10, v26, s[20:23], 0 offen offset:40 -; CHECK-NEXT: buffer_load_dword v11, v26, s[20:23], 0 offen offset:44 -; CHECK-NEXT: buffer_load_dword v12, v26, s[20:23], 0 offen offset:48 -; CHECK-NEXT: buffer_load_dword v13, v26, s[20:23], 0 offen offset:52 -; CHECK-NEXT: buffer_load_dword v14, v26, s[20:23], 0 offen offset:56 -; CHECK-NEXT: buffer_load_dword v15, v26, s[20:23], 0 offen offset:60 -; CHECK-NEXT: buffer_load_dword v17, v26, s[20:23], 0 offen offset:68 -; CHECK-NEXT: buffer_load_dword v19, v26, s[20:23], 0 offen offset:76 -; CHECK-NEXT: buffer_load_dword v21, v26, s[20:23], 0 offen offset:84 -; CHECK-NEXT: buffer_load_dword v23, v26, s[20:23], 0 offen offset:92 -; CHECK-NEXT: buffer_load_dword v22, v26, s[20:23], 0 offen offset:88 -; CHECK-NEXT: buffer_load_dword v20, v26, s[20:23], 0 offen offset:80 -; CHECK-NEXT: buffer_load_dword v18, v26, s[20:23], 0 offen offset:72 -; CHECK-NEXT: buffer_load_dword v16, v26, s[20:23], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v11, v26, s[20:23], 0 offen offset:92 +; CHECK-NEXT: buffer_load_dword v10, v26, s[20:23], 0 offen offset:88 +; CHECK-NEXT: buffer_load_dword v9, v26, s[20:23], 0 offen offset:84 +; CHECK-NEXT: buffer_load_dword v8, v26, s[20:23], 0 offen offset:80 +; CHECK-NEXT: buffer_load_dword v15, v26, s[20:23], 0 offen offset:76 +; CHECK-NEXT: buffer_load_dword v14, v26, s[20:23], 0 offen offset:72 +; CHECK-NEXT: buffer_load_dword v13, v26, s[20:23], 0 offen offset:68 +; CHECK-NEXT: buffer_load_dword v12, v26, s[20:23], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v16, v26, s[20:23], 0 offen offset:32 +; CHECK-NEXT: buffer_load_dword v17, v26, s[20:23], 0 offen offset:36 +; CHECK-NEXT: buffer_load_dword v18, v26, s[20:23], 0 offen offset:40 +; CHECK-NEXT: buffer_load_dword v19, v26, s[20:23], 0 offen offset:44 +; CHECK-NEXT: buffer_load_dword v20, v26, s[20:23], 0 offen offset:48 +; CHECK-NEXT: buffer_load_dword v21, v26, s[20:23], 0 offen offset:52 +; CHECK-NEXT: buffer_load_dword v22, v26, s[20:23], 0 offen offset:56 +; CHECK-NEXT: buffer_load_dword v23, v26, s[20:23], 0 offen offset:60 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_mov_b32_e32 v25, s1 ; CHECK-NEXT: v_mov_b32_e32 v24, s0 -; CHECK-NEXT: s_waitcnt vmcnt(18) +; CHECK-NEXT: s_waitcnt vmcnt(20) ; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[0:3] offset:112 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[4:7] offset:96 @@ -503,10 +499,10 @@ define amdgpu_kernel void @memcpy_p0_p5_optsize(ptr %generic, ptr addrspace(5) % ; CHECK-NEXT: buffer_load_dword v7, v26, s[20:23], 0 offen offset:28 ; CHECK-NEXT: buffer_load_dword v3, v26, s[20:23], 0 offen offset:12 ; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[20:23] offset:80 -; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[16:19] offset:64 -; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[12:15] offset:48 -; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[8:11] offset:32 +; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[8:11] offset:80 +; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[12:15] offset:64 +; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[20:23] offset:48 +; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[16:19] offset:32 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[4:7] offset:16 ; CHECK-NEXT: flat_store_dwordx4 v[24:25], v[0:3] @@ -571,8 +567,8 @@ define amdgpu_kernel void @memcpy_p0_p3_optsize(ptr %generic) #1 { ; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[8:11] offset:32 ; CHECK-NEXT: ds_read2_b64 v[0:3], v16 offset0:8 offset1:9 ; CHECK-NEXT: ds_read2_b64 v[4:7], v16 offset0:10 offset1:11 -; CHECK-NEXT: ds_read2_b64 v[8:11], v16 offset0:12 offset1:13 -; CHECK-NEXT: ds_read2_b64 v[16:19], v16 offset0:14 offset1:15 +; CHECK-NEXT: ds_read_b128 v[8:11], v16 offset:96 +; CHECK-NEXT: ds_read_b128 v[16:19], v16 offset:112 ; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[12:15] offset:48 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[0:3] offset:64 diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-param-combinations.ll b/llvm/test/CodeGen/AMDGPU/memcpy-param-combinations.ll index b43ccc551ca95..048610184368d 100644 --- a/llvm/test/CodeGen/AMDGPU/memcpy-param-combinations.ll +++ b/llvm/test/CodeGen/AMDGPU/memcpy-param-combinations.ll @@ -27,19 +27,16 @@ define void @memcpy_p0_p0_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p0_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30 -; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28 -; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16 -; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] -; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) -; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 -; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3) -; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 -; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3) -; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3) -; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:23 +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] +; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[8:9] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -83,19 +80,16 @@ define void @memcpy_p0_p0_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p0_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30 -; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28 -; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16 -; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] -; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) -; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 -; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3) -; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 -; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3) -; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3) -; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:23 +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] +; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[8:9] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -239,19 +233,16 @@ define void @memcpy_p0_p1_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p1_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30 -; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28 -; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16 -; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off -; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off +; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[8:9] offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -295,19 +286,16 @@ define void @memcpy_p0_p1_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p1_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30 -; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28 -; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16 -; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off -; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off +; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[8:9] offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -437,7 +425,7 @@ define void @memcpy_p0_p3_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p3_sz16_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[2:5], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) @@ -451,19 +439,15 @@ define void @memcpy_p0_p3_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p3_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read_b32 v8, v2 offset:24 -; CHECK-NEXT: ds_read_u8 v9, v2 offset:30 -; CHECK-NEXT: ds_read_u16 v10, v2 offset:28 -; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16 -; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] +; CHECK-NEXT: ds_read_b64 v[7:8], v2 offset:23 +; CHECK-NEXT: ds_read_b128 v[3:6], v2 +; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:16 +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[7:8] offset:23 +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[9:10] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -475,8 +459,8 @@ define void @memcpy_p0_p3_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p3_sz32_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 -; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:16 +; CHECK-NEXT: ds_read_b128 v[7:10], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(1) @@ -492,7 +476,7 @@ define void @memcpy_p0_p3_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p3_sz16_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[2:5], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) @@ -506,19 +490,15 @@ define void @memcpy_p0_p3_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p3_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read_b32 v8, v2 offset:24 -; CHECK-NEXT: ds_read_u8 v9, v2 offset:30 -; CHECK-NEXT: ds_read_u16 v10, v2 offset:28 -; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16 -; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] +; CHECK-NEXT: ds_read_b64 v[7:8], v2 offset:23 +; CHECK-NEXT: ds_read_b128 v[3:6], v2 +; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:16 +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[7:8] offset:23 +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[9:10] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -530,8 +510,8 @@ define void @memcpy_p0_p3_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p3_sz32_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 -; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:16 +; CHECK-NEXT: ds_read_b128 v[7:10], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(1) @@ -643,12 +623,9 @@ define void @memcpy_p0_p4_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p4_sz16_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] -; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8 +; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8 +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -660,24 +637,16 @@ define void @memcpy_p0_p4_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p4_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 -; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24 -; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28 +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[8:9] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28 -; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30 +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:8 +; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30 +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:23 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -689,18 +658,13 @@ define void @memcpy_p0_p4_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p4_sz32_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 -; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24 +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24 +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -712,12 +676,9 @@ define void @memcpy_p0_p4_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p4_sz16_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] -; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8 +; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8 +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -729,24 +690,16 @@ define void @memcpy_p0_p4_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p4_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 -; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24 -; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28 +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[8:9] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28 -; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30 +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:8 +; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30 +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:23 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -758,18 +711,13 @@ define void @memcpy_p0_p4_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p4_sz32_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 -; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 -; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24 +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24 +; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -895,22 +843,20 @@ define void @memcpy_p0_p5_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p5_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x8 -; CHECK-NEXT: buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:30 +; CHECK-NEXT: s_clause 0x7 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen -; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 -; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 -; CHECK-NEXT: buffer_load_ushort v11, v2, s[0:3], 0 offen offset:28 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 -; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: flat_store_short v[0:1], v11 offset:28 -; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:30 -; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16 -; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:23 +; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[7:8] offset:23 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[9:10] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -964,22 +910,20 @@ define void @memcpy_p0_p5_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p0_p5_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x8 -; CHECK-NEXT: buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:30 +; CHECK-NEXT: s_clause 0x7 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen -; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 -; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 -; CHECK-NEXT: buffer_load_ushort v11, v2, s[0:3], 0 offen offset:28 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 -; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: flat_store_short v[0:1], v11 offset:28 -; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:30 -; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16 -; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:23 +; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[7:8] offset:23 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[9:10] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -1161,15 +1105,15 @@ define void @memcpy_p1_p0_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addr ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_clause 0x2 -; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23 -; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16 -; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] +; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:23 +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] +; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[2:3] offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) -; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23 +; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16 +; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false) @@ -1211,15 +1155,15 @@ define void @memcpy_p1_p0_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addr ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_clause 0x2 -; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23 -; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16 -; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] +; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:23 +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] +; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[2:3] offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) -; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23 +; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16 +; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false) @@ -1929,18 +1873,18 @@ define void @memcpy_p1_p5_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addr ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_clause 0x7 -; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen -; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 -; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 -; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 -; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen +; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:20 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 -; CHECK-NEXT: s_waitcnt vmcnt(4) -; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16 +; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1994,18 +1938,18 @@ define void @memcpy_p1_p5_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addr ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_clause 0x7 -; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen -; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 -; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 -; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 -; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen +; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:20 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 -; CHECK-NEXT: s_waitcnt vmcnt(4) -; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16 +; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3267,19 +3211,16 @@ define void @memcpy_p5_p0_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p0_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30 -; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28 -; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16 +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23 +; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] -; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) -; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) -; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 -; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 @@ -3334,19 +3275,16 @@ define void @memcpy_p5_p0_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p0_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30 -; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28 -; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16 +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23 +; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] -; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) -; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) -; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 -; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 @@ -3525,24 +3463,21 @@ define void @memcpy_p5_p1_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p1_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 -; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 -; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 -; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off -; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off +; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 +; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 -; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 -; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false) @@ -3592,24 +3527,21 @@ define void @memcpy_p5_p1_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p1_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 -; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 -; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 -; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off -; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off +; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 +; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 -; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 -; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false) @@ -3783,25 +3715,20 @@ define void @memcpy_p5_p3_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p3_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read_b32 v8, v1 offset:24 -; CHECK-NEXT: ds_read_u16 v9, v1 offset:28 -; CHECK-NEXT: ds_read_u8 v10, v1 offset:30 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 ; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16 -; CHECK-NEXT: s_waitcnt lgkmcnt(4) -; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: ds_read_b64 v[8:9], v1 offset:23 ; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30 -; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false) @@ -3850,25 +3777,20 @@ define void @memcpy_p5_p3_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p3_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read_b32 v8, v1 offset:24 -; CHECK-NEXT: ds_read_u16 v9, v1 offset:28 -; CHECK-NEXT: ds_read_u8 v10, v1 offset:30 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 ; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16 -; CHECK-NEXT: s_waitcnt lgkmcnt(4) -; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: ds_read_b64 v[8:9], v1 offset:23 ; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30 -; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false) @@ -4037,24 +3959,21 @@ define void @memcpy_p5_p4_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p4_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 -; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 -; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 -; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off -; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off +; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 +; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 -; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 -; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false) @@ -4104,24 +4023,21 @@ define void @memcpy_p5_p4_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p4_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x3 -; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 -; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 -; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 -; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off -; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off +; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 +; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 -; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 -; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:27 +; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false) @@ -4302,34 +4218,31 @@ define void @memcpy_p5_p5_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p5_sz31_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x8 -; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28 -; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24 +; CHECK-NEXT: s_clause 0x7 +; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:23 +; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:27 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen -; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30 -; CHECK-NEXT: s_waitcnt vmcnt(8) -; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 ; CHECK-NEXT: s_waitcnt vmcnt(7) -; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(6) -; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:27 ; CHECK-NEXT: s_waitcnt vmcnt(5) -; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(4) -; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 ; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30 +; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) @@ -4398,34 +4311,31 @@ define void @memcpy_p5_p5_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addr ; CHECK-LABEL: memcpy_p5_p5_sz31_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_clause 0x8 -; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28 -; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24 +; CHECK-NEXT: s_clause 0x7 +; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:23 +; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:27 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen -; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30 -; CHECK-NEXT: s_waitcnt vmcnt(8) -; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 ; CHECK-NEXT: s_waitcnt vmcnt(7) -; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:23 ; CHECK-NEXT: s_waitcnt vmcnt(6) -; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:27 ; CHECK-NEXT: s_waitcnt vmcnt(5) -; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 ; CHECK-NEXT: s_waitcnt vmcnt(4) -; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 ; CHECK-NEXT: s_waitcnt vmcnt(3) -; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(2) -; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30 +; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) diff --git a/llvm/test/CodeGen/AMDGPU/memmove-param-combinations.ll b/llvm/test/CodeGen/AMDGPU/memmove-param-combinations.ll index f08ea27040fb5..01b7f40f6256f 100644 --- a/llvm/test/CodeGen/AMDGPU/memmove-param-combinations.ll +++ b/llvm/test/CodeGen/AMDGPU/memmove-param-combinations.ll @@ -471,7 +471,7 @@ define void @memmove_p0_p3_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr add ; CHECK-LABEL: memmove_p0_p3_sz16_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[2:5], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) @@ -489,7 +489,7 @@ define void @memmove_p0_p3_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr add ; CHECK-NEXT: ds_read_u8 v9, v2 offset:30 ; CHECK-NEXT: ds_read_u16 v10, v2 offset:28 ; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16 -; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[2:5], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(3) ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 ; CHECK-NEXT: s_waitcnt lgkmcnt(3) @@ -509,8 +509,8 @@ define void @memmove_p0_p3_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr add ; CHECK-LABEL: memmove_p0_p3_sz32_align_1_1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 -; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:16 +; CHECK-NEXT: ds_read_b128 v[7:10], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(1) @@ -526,7 +526,7 @@ define void @memmove_p0_p3_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr add ; CHECK-LABEL: memmove_p0_p3_sz16_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[2:5], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) @@ -544,7 +544,7 @@ define void @memmove_p0_p3_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr add ; CHECK-NEXT: ds_read_u8 v9, v2 offset:30 ; CHECK-NEXT: ds_read_u16 v10, v2 offset:28 ; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16 -; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[2:5], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(3) ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 ; CHECK-NEXT: s_waitcnt lgkmcnt(3) @@ -564,8 +564,8 @@ define void @memmove_p0_p3_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr add ; CHECK-LABEL: memmove_p0_p3_sz32_align_2_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 -; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 +; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:16 +; CHECK-NEXT: ds_read_b128 v[7:10], v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(1) ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 ; CHECK-NEXT: s_waitcnt lgkmcnt(1) @@ -2077,21 +2077,23 @@ define void @memmove_p1_p5_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr add ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_clause 0x8 -; CHECK-NEXT: buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:30 -; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen -; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 -; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:30 +; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen +; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:24 ; CHECK-NEXT: buffer_load_ushort v11, v2, s[0:3], 0 offen offset:28 -; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 -; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: global_store_dword v[0:1], v10, off offset:24 ; CHECK-NEXT: s_waitcnt vmcnt(3) ; CHECK-NEXT: global_store_short v[0:1], v11, off offset:28 -; CHECK-NEXT: global_store_byte v[0:1], v10, off offset:30 -; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off +; CHECK-NEXT: global_store_byte v[0:1], v9, off offset:30 +; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: global_store_dwordx3 v[0:1], v[7:9], off offset:16 +; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) @@ -2143,21 +2145,23 @@ define void @memmove_p1_p5_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr add ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_clause 0x8 -; CHECK-NEXT: buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:30 -; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen -; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 -; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 -; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:30 +; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen +; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:24 ; CHECK-NEXT: buffer_load_ushort v11, v2, s[0:3], 0 offen offset:28 -; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 -; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 -; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: global_store_dword v[0:1], v10, off offset:24 ; CHECK-NEXT: s_waitcnt vmcnt(3) ; CHECK-NEXT: global_store_short v[0:1], v11, off offset:28 -; CHECK-NEXT: global_store_byte v[0:1], v10, off offset:30 -; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off +; CHECK-NEXT: global_store_byte v[0:1], v9, off offset:30 +; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: global_store_dwordx3 v[0:1], v[7:9], off offset:16 +; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) diff --git a/llvm/test/Transforms/AggressiveInstCombine/AMDGPU/fold-consecutive-loads.ll b/llvm/test/Transforms/AggressiveInstCombine/AMDGPU/fold-consecutive-loads.ll new file mode 100644 index 0000000000000..05d2330fffc7f --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/AMDGPU/fold-consecutive-loads.ll @@ -0,0 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=sroa,instcombine,aggressive-instcombine %s -S -o - | FileCheck %s + +define i64 @quux(ptr %arg) { +; CHECK-LABEL: define i64 @quux( +; CHECK-SAME: ptr [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[ARG]], align 1 +; CHECK-NEXT: ret i64 [[LOAD]] +; +bb: + %load = load i8, ptr %arg, align 1 + %getelementptr = getelementptr inbounds nuw i8, ptr %arg, i64 1 + %load1 = load i8, ptr %getelementptr, align 1 + %getelementptr2 = getelementptr inbounds nuw i8, ptr %arg, i64 2 + %load3 = load i8, ptr %getelementptr2, align 1 + %getelementptr4 = getelementptr inbounds nuw i8, ptr %arg, i64 3 + %load5 = load i8, ptr %getelementptr4, align 1 + %getelementptr6 = getelementptr inbounds nuw i8, ptr %arg, i64 4 + %load7 = load i8, ptr %getelementptr6, align 1 + %getelementptr8 = getelementptr inbounds nuw i8, ptr %arg, i64 5 + %load9 = load i8, ptr %getelementptr8, align 1 + %getelementptr10 = getelementptr inbounds nuw i8, ptr %arg, i64 6 + %load11 = load i8, ptr %getelementptr10, align 1 + %getelementptr12 = getelementptr inbounds nuw i8, ptr %arg, i64 7 + %load13 = load i8, ptr %getelementptr12, align 1 + %zext = zext i8 %load13 to i64 + %shl = shl nuw i64 %zext, 56 + %zext14 = zext i8 %load11 to i64 + %shl15 = shl nuw nsw i64 %zext14, 48 + %or = or disjoint i64 %shl, %shl15 + %zext16 = zext i8 %load9 to i64 + %shl17 = shl nuw nsw i64 %zext16, 40 + %or18 = or disjoint i64 %or, %shl17 + %zext19 = zext i8 %load7 to i64 + %shl20 = shl nuw nsw i64 %zext19, 32 + %or21 = or disjoint i64 %or18, %shl20 + %zext22 = zext i8 %load5 to i64 + %shl23 = shl nuw nsw i64 %zext22, 24 + %or24 = or disjoint i64 %or21, %shl23 + %zext25 = zext i8 %load3 to i64 + %shl26 = shl nuw nsw i64 %zext25, 16 + %zext27 = zext i8 %load1 to i64 + %shl28 = shl nuw nsw i64 %zext27, 8 + %or29 = or disjoint i64 %or24, %shl26 + %zext30 = zext i8 %load to i64 + %or31 = or i64 %or29, %shl28 + %or32 = or i64 %or31, %zext30 + ret i64 %or32 +} + + +; The following test case reduced from a client kernel +define fastcc <16 x float> @hoge(ptr %arg) { +; CHECK-LABEL: define fastcc <16 x float> @hoge( +; CHECK-SAME: ptr [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[ARG]], align 8 +; CHECK-NEXT: [[LOAD28:%.*]] = load i64, ptr [[LOAD]], align 1 +; CHECK-NEXT: [[GETELEMENTPTR72:%.*]] = getelementptr i8, ptr [[LOAD]], i64 8 +; CHECK-NEXT: [[LOAD73:%.*]] = load i64, ptr [[GETELEMENTPTR72]], align 1 +; CHECK-NEXT: [[GETELEMENTPTR120:%.*]] = getelementptr i8, ptr [[LOAD]], i64 16 +; CHECK-NEXT: [[LOAD121:%.*]] = load i64, ptr [[GETELEMENTPTR120]], align 1 +; CHECK-NEXT: [[GETELEMENTPTR168:%.*]] = getelementptr i8, ptr [[LOAD]], i64 24 +; CHECK-NEXT: [[LOAD169:%.*]] = load i64, ptr [[GETELEMENTPTR168]], align 1 +; CHECK-NEXT: [[CALL:%.*]] = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 [[LOAD28]], i64 0, <16 x float> zeroinitializer, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[CALL225:%.*]] = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 [[LOAD73]], i64 0, <16 x float> [[CALL]], i32 0, i32 0, i32 0) +; CHECK-NEXT: [[CALL230:%.*]] = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 [[LOAD121]], i64 0, <16 x float> [[CALL225]], i32 0, i32 0, i32 0) +; CHECK-NEXT: [[CALL235:%.*]] = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 [[LOAD169]], i64 0, <16 x float> [[CALL230]], i32 0, i32 0, i32 0) +; CHECK-NEXT: ret <16 x float> [[CALL235]] +; +bb: + %load = load ptr, ptr %arg, align 8 + %load28 = load i8, ptr %load, align 1 + %getelementptr30 = getelementptr i8, ptr %load, i64 1 + %load31 = load i8, ptr %getelementptr30, align 1 + %getelementptr36 = getelementptr i8, ptr %load, i64 2 + %load37 = load i8, ptr %getelementptr36, align 1 + %getelementptr42 = getelementptr i8, ptr %load, i64 3 + %load43 = load i8, ptr %getelementptr42, align 1 + %getelementptr48 = getelementptr i8, ptr %load, i64 4 + %load49 = load i8, ptr %getelementptr48, align 1 + %getelementptr54 = getelementptr i8, ptr %load, i64 5 + %load55 = load i8, ptr %getelementptr54, align 1 + %getelementptr60 = getelementptr i8, ptr %load, i64 6 + %load61 = load i8, ptr %getelementptr60, align 1 + %getelementptr66 = getelementptr i8, ptr %load, i64 7 + %load67 = load i8, ptr %getelementptr66, align 1 + %getelementptr72 = getelementptr i8, ptr %load, i64 8 + %load73 = load i8, ptr %getelementptr72, align 1 + %getelementptr78 = getelementptr i8, ptr %load, i64 9 + %load79 = load i8, ptr %getelementptr78, align 1 + %getelementptr84 = getelementptr i8, ptr %load, i64 10 + %load85 = load i8, ptr %getelementptr84, align 1 + %getelementptr90 = getelementptr i8, ptr %load, i64 11 + %load91 = load i8, ptr %getelementptr90, align 1 + %getelementptr96 = getelementptr i8, ptr %load, i64 12 + %load97 = load i8, ptr %getelementptr96, align 1 + %getelementptr102 = getelementptr i8, ptr %load, i64 13 + %load103 = load i8, ptr %getelementptr102, align 1 + %getelementptr108 = getelementptr i8, ptr %load, i64 14 + %load109 = load i8, ptr %getelementptr108, align 1 + %getelementptr114 = getelementptr i8, ptr %load, i64 15 + %load115 = load i8, ptr %getelementptr114, align 1 + %getelementptr120 = getelementptr i8, ptr %load, i64 16 + %load121 = load i8, ptr %getelementptr120, align 1 + %getelementptr126 = getelementptr i8, ptr %load, i64 17 + %load127 = load i8, ptr %getelementptr126, align 1 + %getelementptr132 = getelementptr i8, ptr %load, i64 18 + %load133 = load i8, ptr %getelementptr132, align 1 + %getelementptr138 = getelementptr i8, ptr %load, i64 19 + %load139 = load i8, ptr %getelementptr138, align 1 + %getelementptr144 = getelementptr i8, ptr %load, i64 20 + %load145 = load i8, ptr %getelementptr144, align 1 + %getelementptr150 = getelementptr i8, ptr %load, i64 21 + %load151 = load i8, ptr %getelementptr150, align 1 + %getelementptr156 = getelementptr i8, ptr %load, i64 22 + %load157 = load i8, ptr %getelementptr156, align 1 + %getelementptr162 = getelementptr i8, ptr %load, i64 23 + %load163 = load i8, ptr %getelementptr162, align 1 + %getelementptr168 = getelementptr i8, ptr %load, i64 24 + %load169 = load i8, ptr %getelementptr168, align 1 + %getelementptr174 = getelementptr i8, ptr %load, i64 25 + %load175 = load i8, ptr %getelementptr174, align 1 + %getelementptr180 = getelementptr i8, ptr %load, i64 26 + %load181 = load i8, ptr %getelementptr180, align 1 + %getelementptr186 = getelementptr i8, ptr %load, i64 27 + %load187 = load i8, ptr %getelementptr186, align 1 + %getelementptr192 = getelementptr i8, ptr %load, i64 28 + %load193 = load i8, ptr %getelementptr192, align 1 + %getelementptr198 = getelementptr i8, ptr %load, i64 29 + %load199 = load i8, ptr %getelementptr198, align 1 + %getelementptr204 = getelementptr i8, ptr %load, i64 30 + %load205 = load i8, ptr %getelementptr204, align 1 + %getelementptr210 = getelementptr i8, ptr %load, i64 31 + %load211 = load i8, ptr %getelementptr210, align 1 + %alloca1.sroa.8.0.insert.ext = zext i8 %load67 to i64 + %alloca1.sroa.8.0.insert.shift = shl i64 %alloca1.sroa.8.0.insert.ext, 56 + %alloca1.sroa.7.0.insert.ext = zext i8 %load61 to i64 + %alloca1.sroa.7.0.insert.shift = shl i64 %alloca1.sroa.7.0.insert.ext, 48 + %alloca1.sroa.7.0.insert.insert = or i64 %alloca1.sroa.8.0.insert.shift, %alloca1.sroa.7.0.insert.shift + %alloca1.sroa.6.0.insert.ext = zext i8 %load55 to i64 + %alloca1.sroa.6.0.insert.shift = shl i64 %alloca1.sroa.6.0.insert.ext, 40 + %alloca1.sroa.6.0.insert.insert = or i64 %alloca1.sroa.7.0.insert.insert, %alloca1.sroa.6.0.insert.shift + %alloca1.sroa.5.0.insert.ext = zext i8 %load49 to i64 + %alloca1.sroa.5.0.insert.shift = shl i64 %alloca1.sroa.5.0.insert.ext, 32 + %alloca1.sroa.5.0.insert.insert = or i64 %alloca1.sroa.6.0.insert.insert, %alloca1.sroa.5.0.insert.shift + %alloca1.sroa.4.0.insert.ext = zext i8 %load43 to i64 + %alloca1.sroa.4.0.insert.shift = shl i64 %alloca1.sroa.4.0.insert.ext, 24 + %alloca1.sroa.4.0.insert.insert = or i64 %alloca1.sroa.5.0.insert.insert, %alloca1.sroa.4.0.insert.shift + %alloca1.sroa.3.0.insert.ext = zext i8 %load37 to i64 + %alloca1.sroa.3.0.insert.shift = shl i64 %alloca1.sroa.3.0.insert.ext, 16 + %alloca1.sroa.2.0.insert.ext = zext i8 %load31 to i64 + %alloca1.sroa.2.0.insert.shift = shl i64 %alloca1.sroa.2.0.insert.ext, 8 + %alloca1.sroa.2.0.insert.mask = or i64 %alloca1.sroa.4.0.insert.insert, %alloca1.sroa.3.0.insert.shift + %alloca1.sroa.0.0.insert.ext = zext i8 %load28 to i64 + %alloca1.sroa.0.0.insert.mask = or i64 %alloca1.sroa.2.0.insert.mask, %alloca1.sroa.2.0.insert.shift + %alloca1.sroa.0.0.insert.insert = or i64 %alloca1.sroa.0.0.insert.mask, %alloca1.sroa.0.0.insert.ext + %call = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 %alloca1.sroa.0.0.insert.insert, i64 0, <16 x float> zeroinitializer, i32 0, i32 0, i32 0) + %alloca1.sroa.17.8.insert.ext = zext i8 %load115 to i64 + %alloca1.sroa.17.8.insert.shift = shl i64 %alloca1.sroa.17.8.insert.ext, 56 + %alloca1.sroa.16.8.insert.ext = zext i8 %load109 to i64 + %alloca1.sroa.16.8.insert.shift = shl i64 %alloca1.sroa.16.8.insert.ext, 48 + %alloca1.sroa.16.8.insert.insert = or i64 %alloca1.sroa.17.8.insert.shift, %alloca1.sroa.16.8.insert.shift + %alloca1.sroa.15.8.insert.ext = zext i8 %load103 to i64 + %alloca1.sroa.15.8.insert.shift = shl i64 %alloca1.sroa.15.8.insert.ext, 40 + %alloca1.sroa.15.8.insert.insert = or i64 %alloca1.sroa.16.8.insert.insert, %alloca1.sroa.15.8.insert.shift + %alloca1.sroa.14.8.insert.ext = zext i8 %load97 to i64 + %alloca1.sroa.14.8.insert.shift = shl i64 %alloca1.sroa.14.8.insert.ext, 32 + %alloca1.sroa.14.8.insert.insert = or i64 %alloca1.sroa.15.8.insert.insert, %alloca1.sroa.14.8.insert.shift + %alloca1.sroa.13.8.insert.ext = zext i8 %load91 to i64 + %alloca1.sroa.13.8.insert.shift = shl i64 %alloca1.sroa.13.8.insert.ext, 24 + %alloca1.sroa.13.8.insert.insert = or i64 %alloca1.sroa.14.8.insert.insert, %alloca1.sroa.13.8.insert.shift + %alloca1.sroa.12.8.insert.ext = zext i8 %load85 to i64 + %alloca1.sroa.12.8.insert.shift = shl i64 %alloca1.sroa.12.8.insert.ext, 16 + %alloca1.sroa.11.8.insert.ext = zext i8 %load79 to i64 + %alloca1.sroa.11.8.insert.shift = shl i64 %alloca1.sroa.11.8.insert.ext, 8 + %alloca1.sroa.11.8.insert.mask = or i64 %alloca1.sroa.13.8.insert.insert, %alloca1.sroa.12.8.insert.shift + %alloca1.sroa.9.8.insert.ext = zext i8 %load73 to i64 + %alloca1.sroa.9.8.insert.mask = or i64 %alloca1.sroa.11.8.insert.mask, %alloca1.sroa.11.8.insert.shift + %alloca1.sroa.9.8.insert.insert = or i64 %alloca1.sroa.9.8.insert.mask, %alloca1.sroa.9.8.insert.ext + %call225 = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 %alloca1.sroa.9.8.insert.insert, i64 0, <16 x float> %call, i32 0, i32 0, i32 0) + %alloca1.sroa.26.16.insert.ext = zext i8 %load163 to i64 + %alloca1.sroa.26.16.insert.shift = shl i64 %alloca1.sroa.26.16.insert.ext, 56 + %alloca1.sroa.25.16.insert.ext = zext i8 %load157 to i64 + %alloca1.sroa.25.16.insert.shift = shl i64 %alloca1.sroa.25.16.insert.ext, 48 + %alloca1.sroa.25.16.insert.insert = or i64 %alloca1.sroa.26.16.insert.shift, %alloca1.sroa.25.16.insert.shift + %alloca1.sroa.24.16.insert.ext = zext i8 %load151 to i64 + %alloca1.sroa.24.16.insert.shift = shl i64 %alloca1.sroa.24.16.insert.ext, 40 + %alloca1.sroa.24.16.insert.insert = or i64 %alloca1.sroa.25.16.insert.insert, %alloca1.sroa.24.16.insert.shift + %alloca1.sroa.23.16.insert.ext = zext i8 %load145 to i64 + %alloca1.sroa.23.16.insert.shift = shl i64 %alloca1.sroa.23.16.insert.ext, 32 + %alloca1.sroa.23.16.insert.insert = or i64 %alloca1.sroa.24.16.insert.insert, %alloca1.sroa.23.16.insert.shift + %alloca1.sroa.22.16.insert.ext = zext i8 %load139 to i64 + %alloca1.sroa.22.16.insert.shift = shl i64 %alloca1.sroa.22.16.insert.ext, 24 + %alloca1.sroa.22.16.insert.insert = or i64 %alloca1.sroa.23.16.insert.insert, %alloca1.sroa.22.16.insert.shift + %alloca1.sroa.21.16.insert.ext = zext i8 %load133 to i64 + %alloca1.sroa.21.16.insert.shift = shl i64 %alloca1.sroa.21.16.insert.ext, 16 + %alloca1.sroa.20.16.insert.ext = zext i8 %load127 to i64 + %alloca1.sroa.20.16.insert.shift = shl i64 %alloca1.sroa.20.16.insert.ext, 8 + %alloca1.sroa.20.16.insert.mask = or i64 %alloca1.sroa.22.16.insert.insert, %alloca1.sroa.21.16.insert.shift + %alloca1.sroa.18.16.insert.ext = zext i8 %load121 to i64 + %alloca1.sroa.18.16.insert.mask = or i64 %alloca1.sroa.20.16.insert.mask, %alloca1.sroa.20.16.insert.shift + %alloca1.sroa.18.16.insert.insert = or i64 %alloca1.sroa.18.16.insert.mask, %alloca1.sroa.18.16.insert.ext + %call230 = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 %alloca1.sroa.18.16.insert.insert, i64 0, <16 x float> %call225, i32 0, i32 0, i32 0) + %alloca1.sroa.35.24.insert.ext = zext i8 %load211 to i64 + %alloca1.sroa.35.24.insert.shift = shl i64 %alloca1.sroa.35.24.insert.ext, 56 + %alloca1.sroa.34.24.insert.ext = zext i8 %load205 to i64 + %alloca1.sroa.34.24.insert.shift = shl i64 %alloca1.sroa.34.24.insert.ext, 48 + %alloca1.sroa.34.24.insert.insert = or i64 %alloca1.sroa.35.24.insert.shift, %alloca1.sroa.34.24.insert.shift + %alloca1.sroa.33.24.insert.ext = zext i8 %load199 to i64 + %alloca1.sroa.33.24.insert.shift = shl i64 %alloca1.sroa.33.24.insert.ext, 40 + %alloca1.sroa.33.24.insert.insert = or i64 %alloca1.sroa.34.24.insert.insert, %alloca1.sroa.33.24.insert.shift + %alloca1.sroa.32.24.insert.ext = zext i8 %load193 to i64 + %alloca1.sroa.32.24.insert.shift = shl i64 %alloca1.sroa.32.24.insert.ext, 32 + %alloca1.sroa.32.24.insert.insert = or i64 %alloca1.sroa.33.24.insert.insert, %alloca1.sroa.32.24.insert.shift + %alloca1.sroa.31.24.insert.ext = zext i8 %load187 to i64 + %alloca1.sroa.31.24.insert.shift = shl i64 %alloca1.sroa.31.24.insert.ext, 24 + %alloca1.sroa.31.24.insert.insert = or i64 %alloca1.sroa.32.24.insert.insert, %alloca1.sroa.31.24.insert.shift + %alloca1.sroa.30.24.insert.ext = zext i8 %load181 to i64 + %alloca1.sroa.30.24.insert.shift = shl i64 %alloca1.sroa.30.24.insert.ext, 16 + %alloca1.sroa.29.24.insert.ext = zext i8 %load175 to i64 + %alloca1.sroa.29.24.insert.shift = shl i64 %alloca1.sroa.29.24.insert.ext, 8 + %alloca1.sroa.29.24.insert.mask = or i64 %alloca1.sroa.31.24.insert.insert, %alloca1.sroa.30.24.insert.shift + %alloca1.sroa.27.24.insert.ext = zext i8 %load169 to i64 + %alloca1.sroa.27.24.insert.mask = or i64 %alloca1.sroa.29.24.insert.mask, %alloca1.sroa.29.24.insert.shift + %alloca1.sroa.27.24.insert.insert = or i64 %alloca1.sroa.27.24.insert.mask, %alloca1.sroa.27.24.insert.ext + %call235 = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 %alloca1.sroa.27.24.insert.insert, i64 0, <16 x float> %call230, i32 0, i32 0, i32 0) + ret <16 x float> %call235 +} + +declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64, i64, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #0 + +attributes #0 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/Transforms/AggressiveInstCombine/AMDGPU/lit.local.cfg b/llvm/test/Transforms/AggressiveInstCombine/AMDGPU/lit.local.cfg new file mode 100644 index 0000000000000..7c492428aec76 --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not "AMDGPU" in config.root.targets: + config.unsupported = True From 994a6a39e13dcc335247a127a5da05905d1ac541 Mon Sep 17 00:00:00 2001 From: Uyiosa Iyekekpolor <96444432+uyoyo0@users.noreply.github.com> Date: Mon, 15 Sep 2025 06:08:16 -0400 Subject: [PATCH 318/734] [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) The scalarizeExtExtract transform assumed little-endian lane ordering, causing miscompiles on big-endian targets such as AIX/PowerPC under -O3 -flto. This patch updates the shift calculation to handle endianness correctly for big-endian targets. No functional change for little-endian targets. Fixes #158197. --------- Co-authored-by: Simon Pilgrim --- .../Transforms/Vectorize/VectorCombine.cpp | 11 ++++-- .../AArch64/scalarize-ext-extract-endian.ll | 36 +++++++++++++++++++ .../VectorCombine/PowerPC/lit.local.cfg | 2 ++ .../PowerPC/scalarize-ext-extract.ll | 22 ++++++++++++ 4 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll create mode 100644 llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg create mode 100644 llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index a84b6f59971c9..0ef933f596604 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2014,12 +2014,19 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) { IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy))); uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType()); uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1; + uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy); + Type *PackedTy = IntegerType::get(SrcTy->getContext(), TotalBits); + Value *Mask = ConstantInt::get(PackedTy, EltBitMask); for (User *U : Ext->users()) { auto *Extract = cast(U); uint64_t Idx = cast(Extract->getIndexOperand())->getZExtValue(); - Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits); - Value *And = Builder.CreateAnd(LShr, EltBitMask); + uint64_t ShiftAmt = + DL->isBigEndian() + ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits) + : (Idx * SrcEltSizeInBits); + Value *LShr = Builder.CreateLShr(ScalarV, ShiftAmt); + Value *And = Builder.CreateAnd(LShr, Mask); U->replaceAllUsesWith(And); } return true; diff --git a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll new file mode 100644 index 0000000000000..9796faf2e6feb --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='vector-combine' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE +; RUN: opt -passes='vector-combine' -S -mtriple=aarch64_be-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=BE + +define i64 @g(<8 x i8> %v) { +; LE-LABEL: @g( +; LE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; LE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; LE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 56 +; LE-NEXT: [[TMP4:%.*]] = and i64 [[TMP2]], 255 +; LE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; LE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; LE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 +; LE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; LE-NEXT: ret i64 [[SUM]] +; +; BE-LABEL: @g( +; BE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 +; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56 +; BE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; BE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; BE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 +; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; BE-NEXT: ret i64 [[SUM]] +; + %z = zext <8 x i8> %v to <8 x i64> + %e0 = extractelement <8 x i64> %z, i32 0 + %e7 = extractelement <8 x i64> %z, i32 7 + %sum = add i64 %e0, %e7 + ret i64 %sum +} + + + diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg new file mode 100644 index 0000000000000..15af315f104fc --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if 'PowerPC' not in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll new file mode 100644 index 0000000000000..a9b719920c341 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='vector-combine' -S -mtriple=powerpc64-ibm-aix-xcoff %s -o - | FileCheck %s --check-prefix=BE + +define i64 @g(<8 x i8> %v) { +; BE-LABEL: @g( +; BE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 +; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56 +; BE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; BE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; BE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 +; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; BE-NEXT: ret i64 [[SUM]] +; + %z = zext <8 x i8> %v to <8 x i64> + %e0 = extractelement <8 x i64> %z, i32 0 + %e7 = extractelement <8 x i64> %z, i32 7 + %sum = add i64 %e0, %e7 + ret i64 %sum +} + From e1d60f7f3c2773f21d13ae50d08a605529c102da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Mon, 15 Sep 2025 11:15:01 +0100 Subject: [PATCH 319/734] [mlir][vector][nfc] Group all tests for `foldFromElementsToConstant` (#158578) This patch merely moves + renames tests for Vector's `foldFromElementsToConstant` - to better align with our testing guides: * https://mlir.llvm.org/getting_started/TestingGuide/ Changes: 1. Make sure that all tests for `foldFromElementsToConstant` are grouped together. 2. Use `@negative_` as a prefix for negative tests (*). 3. Use captigal letters for LIT variable names (*). --- mlir/test/Dialect/Vector/canonicalize.mlir | 50 ++++++++++++++-------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index e7381e0c8997e..05c88b8abfbb0 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -3326,8 +3326,12 @@ func.func @from_elements_to_elements_shuffle(%a: vector<4x2xf32>) -> vector<4x2x // ----- -// CHECK-LABEL: func @from_elements_all_elements_constant( -func.func @from_elements_all_elements_constant() -> vector<2x2xi32> { +// +--------------------------------------------------------------------------- +// Tests for foldFromElementsToConstant +// +--------------------------------------------------------------------------- + +// CHECK-LABEL: func @from_elements_to_constant( +func.func @from_elements_to_constant() -> vector<2x2xi32> { %c0_i32 = arith.constant 0 : i32 %c1_i32 = arith.constant 1 : i32 %c2_i32 = arith.constant 2 : i32 @@ -3340,9 +3344,11 @@ func.func @from_elements_all_elements_constant() -> vector<2x2xi32> { // ----- -// CHECK-LABEL: func @from_elements_partial_elements_constant( +// One of the elements is not a constant, the folder should fail. + +// CHECK-LABEL: func @negative_from_elements_to_constant( // CHECK-SAME: %[[A:.*]]: f32 -func.func @from_elements_partial_elements_constant(%arg0: f32) -> vector<2xf32> { +func.func @negative_from_elements_to_constant(%arg0: f32) -> vector<2xf32> { // CHECK: %[[C:.*]] = arith.constant 1.000000e+00 : f32 %c = arith.constant 1.0 : f32 // CHECK: %[[RES:.*]] = vector.from_elements %[[A]], %[[C]] : vector<2xf32> @@ -3353,6 +3359,28 @@ func.func @from_elements_partial_elements_constant(%arg0: f32) -> vector<2xf32> // ----- +// While all inputs in this example are constant, we cannot create a +// DenselElemAttr containing llvm.mlir.addressof. Instead, +// `foldFromElementsToConstant` bails out. Note that in this case, a different +// folder is applied (`rewriteFromElementsAsBroadcast`). +llvm.mlir.global constant @my_symbol() : i32 + +// CHECK-LABEL: func @negative_from_elements_to_constant +// CHECK: %[[A:.*]] = llvm.mlir.addressof @my_symbol +// CHECK: %[[B:.*]] = vector.broadcast %[[A]] : !llvm.ptr to vector<1x!llvm.ptr> +// CHECK: return %[[B]] +func.func @negative_from_elements_to_constant() -> vector<1x!llvm.ptr> { + %a = llvm.mlir.addressof @my_symbol : !llvm.ptr + %b = vector.from_elements %a : vector<1x!llvm.ptr> + return %b : vector<1x!llvm.ptr> +} + +// +--------------------------------------------------------------------------- +// End of Tests for foldFromElementsToConstant +// +--------------------------------------------------------------------------- + +// ----- + // CHECK-LABEL: func @vector_insert_const_regression( // CHECK: llvm.mlir.undef // CHECK: vector.insert @@ -3726,17 +3754,3 @@ func.func @no_fold_insert_use_chain_mismatch_static_position(%arg : vector<4xf32 %v_1 = vector.insert %val, %v_0[1] : f32 into vector<4xf32> return %v_1 : vector<4xf32> } - -// ----- - -llvm.mlir.global constant @my_symbol() : i32 - -// CHECK-LABEL: func @from_address_of_regression -// CHECK: %[[a:.*]] = llvm.mlir.addressof @my_symbol -// CHECK: %[[b:.*]] = vector.broadcast %[[a]] : !llvm.ptr to vector<1x!llvm.ptr> -// CHECK: return %[[b]] -func.func @from_address_of_regression() -> vector<1x!llvm.ptr> { - %a = llvm.mlir.addressof @my_symbol : !llvm.ptr - %b = vector.from_elements %a : vector<1x!llvm.ptr> - return %b : vector<1x!llvm.ptr> -} From cdedc81c33649e97f053ca9eb346e3db6664bd7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Horv=C3=A1th?= Date: Mon, 15 Sep 2025 12:30:29 +0200 Subject: [PATCH 320/734] [APINotes] Support annotating safety of APIs (#157506) --- clang/docs/APINotes.rst | 14 ++++++++ clang/include/clang/APINotes/Types.h | 28 +++++++++++++-- clang/lib/APINotes/APINotesFormat.h | 2 +- clang/lib/APINotes/APINotesReader.cpp | 13 ++++--- clang/lib/APINotes/APINotesTypes.cpp | 15 ++++++++ clang/lib/APINotes/APINotesWriter.cpp | 6 ++++ clang/lib/APINotes/APINotesYAMLCompiler.cpp | 34 +++++++++++++++++++ clang/lib/Sema/SemaAPINotes.cpp | 24 +++++++++++++ .../Inputs/Headers/SwiftImportAs.apinotes | 21 ++++++++++++ .../APINotes/Inputs/Headers/SwiftImportAs.h | 11 ++++++ clang/test/APINotes/swift-import-as.cpp | 30 ++++++++++++++++ 11 files changed, 190 insertions(+), 8 deletions(-) diff --git a/clang/docs/APINotes.rst b/clang/docs/APINotes.rst index dec4b186ff72f..e142cfa62e5a2 100644 --- a/clang/docs/APINotes.rst +++ b/clang/docs/APINotes.rst @@ -229,6 +229,20 @@ declaration kind), all of which are optional: - Name: vector SwiftConformsTo: Cxx.CxxSequence +:SwiftSafety: + + Import a declaration as ``@safe`` or ``@unsafe`` to Swift. + + :: + + Tags: + - Name: UnsafeType + SwiftSafety: unsafe + - Name: span + Methods: + - Name: size + SwiftSafety: safe + :Availability, AvailabilityMsg: A value of "nonswift" is equivalent to ``NS_SWIFT_UNAVAILABLE``. A value of diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h index 71625715bda19..fb2b91a3e1750 100644 --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -46,6 +46,8 @@ enum class SwiftNewTypeKind { Enum, }; +enum class SwiftSafetyKind { Unspecified, Safe, Unsafe, None }; + /// Describes API notes data for any entity. /// /// This is used as the base of all API notes. @@ -71,13 +73,19 @@ class CommonEntityInfo { LLVM_PREFERRED_TYPE(bool) unsigned SwiftPrivate : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned SwiftSafetyAudited : 1; + + LLVM_PREFERRED_TYPE(SwiftSafetyKind) + unsigned SwiftSafety : 2; + public: /// Swift name of this entity. std::string SwiftName; CommonEntityInfo() : Unavailable(0), UnavailableInSwift(0), SwiftPrivateSpecified(0), - SwiftPrivate(0) {} + SwiftPrivate(0), SwiftSafetyAudited(0), SwiftSafety(0) {} std::optional isSwiftPrivate() const { return SwiftPrivateSpecified ? std::optional(SwiftPrivate) @@ -89,6 +97,17 @@ class CommonEntityInfo { SwiftPrivate = Private.value_or(0); } + std::optional getSwiftSafety() const { + return SwiftSafetyAudited ? std::optional( + static_cast(SwiftSafety)) + : std::nullopt; + } + + void setSwiftSafety(SwiftSafetyKind Safety) { + SwiftSafetyAudited = 1; + SwiftSafety = static_cast(Safety); + } + friend bool operator==(const CommonEntityInfo &, const CommonEntityInfo &); CommonEntityInfo &operator|=(const CommonEntityInfo &RHS) { @@ -108,6 +127,9 @@ class CommonEntityInfo { if (!SwiftPrivateSpecified) setSwiftPrivate(RHS.isSwiftPrivate()); + if (!SwiftSafetyAudited && RHS.SwiftSafetyAudited) + setSwiftSafety(*RHS.getSwiftSafety()); + if (SwiftName.empty()) SwiftName = RHS.SwiftName; @@ -123,7 +145,9 @@ inline bool operator==(const CommonEntityInfo &LHS, LHS.Unavailable == RHS.Unavailable && LHS.UnavailableInSwift == RHS.UnavailableInSwift && LHS.SwiftPrivateSpecified == RHS.SwiftPrivateSpecified && - LHS.SwiftPrivate == RHS.SwiftPrivate && LHS.SwiftName == RHS.SwiftName; + LHS.SwiftPrivate == RHS.SwiftPrivate && + LHS.SwiftSafetyAudited == RHS.SwiftSafetyAudited && + LHS.SwiftSafety == RHS.SwiftSafety && LHS.SwiftName == RHS.SwiftName; } inline bool operator!=(const CommonEntityInfo &LHS, diff --git a/clang/lib/APINotes/APINotesFormat.h b/clang/lib/APINotes/APINotesFormat.h index 69d180e7b3eb5..bb423ccb2bfaf 100644 --- a/clang/lib/APINotes/APINotesFormat.h +++ b/clang/lib/APINotes/APINotesFormat.h @@ -24,7 +24,7 @@ const uint16_t VERSION_MAJOR = 0; /// API notes file minor version number. /// /// When the format changes IN ANY WAY, this number should be incremented. -const uint16_t VERSION_MINOR = 37; // SwiftDestroyOp +const uint16_t VERSION_MINOR = 38; // SwiftSafety const uint8_t kSwiftConforms = 1; const uint8_t kSwiftDoesNotConform = 2; diff --git a/clang/lib/APINotes/APINotesReader.cpp b/clang/lib/APINotes/APINotesReader.cpp index 573356f97ff73..7f9bb5f12cda7 100644 --- a/clang/lib/APINotes/APINotesReader.cpp +++ b/clang/lib/APINotes/APINotesReader.cpp @@ -94,11 +94,14 @@ class VersionedTableInfo { /// Read serialized CommonEntityInfo. void ReadCommonEntityInfo(const uint8_t *&Data, CommonEntityInfo &Info) { - uint8_t UnavailableBits = *Data++; - Info.Unavailable = (UnavailableBits >> 1) & 0x01; - Info.UnavailableInSwift = UnavailableBits & 0x01; - if ((UnavailableBits >> 2) & 0x01) - Info.setSwiftPrivate(static_cast((UnavailableBits >> 3) & 0x01)); + uint8_t EncodedBits = *Data++; + Info.Unavailable = (EncodedBits >> 1) & 0x01; + Info.UnavailableInSwift = EncodedBits & 0x01; + if ((EncodedBits >> 2) & 0x01) + Info.setSwiftPrivate(static_cast((EncodedBits >> 3) & 0x01)); + if ((EncodedBits >> 4) & 0x01) + Info.setSwiftSafety( + static_cast((EncodedBits >> 5) & 0x03)); unsigned MsgLength = endian::readNext(Data); diff --git a/clang/lib/APINotes/APINotesTypes.cpp b/clang/lib/APINotes/APINotesTypes.cpp index f726faa832bcc..bff4be104c6c8 100644 --- a/clang/lib/APINotes/APINotesTypes.cpp +++ b/clang/lib/APINotes/APINotesTypes.cpp @@ -18,6 +18,21 @@ LLVM_DUMP_METHOD void CommonEntityInfo::dump(llvm::raw_ostream &OS) const { OS << "[UnavailableInSwift] "; if (SwiftPrivateSpecified) OS << (SwiftPrivate ? "[SwiftPrivate] " : ""); + if (SwiftSafetyAudited) { + switch (*getSwiftSafety()) { + case SwiftSafetyKind::Safe: + OS << "[Safe] "; + break; + case SwiftSafetyKind::Unsafe: + OS << "[Unsafe] "; + break; + case SwiftSafetyKind::Unspecified: + OS << "[Unspecified] "; + break; + case SwiftSafetyKind::None: + break; + } + } if (!SwiftName.empty()) OS << "Swift Name: " << SwiftName << ' '; OS << '\n'; diff --git a/clang/lib/APINotes/APINotesWriter.cpp b/clang/lib/APINotes/APINotesWriter.cpp index cf88d118d0979..47ed93a567c0e 100644 --- a/clang/lib/APINotes/APINotesWriter.cpp +++ b/clang/lib/APINotes/APINotesWriter.cpp @@ -507,6 +507,12 @@ void emitCommonEntityInfo(raw_ostream &OS, const CommonEntityInfo &CEI) { llvm::support::endian::Writer writer(OS, llvm::endianness::little); uint8_t payload = 0; + if (auto safety = CEI.getSwiftSafety()) { + payload = static_cast(*safety); + payload <<= 1; + payload |= 0x01; + } + payload <<= 2; if (auto swiftPrivate = CEI.isSwiftPrivate()) { payload |= 0x01; if (*swiftPrivate) diff --git a/clang/lib/APINotes/APINotesYAMLCompiler.cpp b/clang/lib/APINotes/APINotesYAMLCompiler.cpp index a91a1eea03d81..8e91d48b4ba62 100644 --- a/clang/lib/APINotes/APINotesYAMLCompiler.cpp +++ b/clang/lib/APINotes/APINotesYAMLCompiler.cpp @@ -29,6 +29,18 @@ using namespace clang; using namespace api_notes; +namespace llvm { +namespace yaml { +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, SwiftSafetyKind &SK) { + IO.enumCase(SK, "unspecified", SwiftSafetyKind::Unspecified); + IO.enumCase(SK, "safe", SwiftSafetyKind::Safe); + IO.enumCase(SK, "unsafe", SwiftSafetyKind::Unsafe); + } +}; +} // namespace yaml +} // namespace llvm + namespace { enum class APIAvailability { Available = 0, @@ -163,6 +175,7 @@ struct Method { bool Required = false; StringRef ResultType; StringRef SwiftReturnOwnership; + SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; }; typedef std::vector MethodsSeq; @@ -199,6 +212,7 @@ template <> struct MappingTraits { IO.mapOptional("ResultType", M.ResultType, StringRef("")); IO.mapOptional("SwiftReturnOwnership", M.SwiftReturnOwnership, StringRef("")); + IO.mapOptional("SwiftSafety", M.SafetyKind, SwiftSafetyKind::None); } }; } // namespace yaml @@ -214,6 +228,7 @@ struct Property { StringRef SwiftName; std::optional SwiftImportAsAccessors; StringRef Type; + SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; }; typedef std::vector PropertiesSeq; @@ -235,6 +250,7 @@ template <> struct MappingTraits { IO.mapOptional("SwiftName", P.SwiftName, StringRef("")); IO.mapOptional("SwiftImportAsAccessors", P.SwiftImportAsAccessors); IO.mapOptional("Type", P.Type, StringRef("")); + IO.mapOptional("SwiftSafety", P.SafetyKind, SwiftSafetyKind::None); } }; } // namespace yaml @@ -254,6 +270,7 @@ struct Class { std::optional SwiftConformance; MethodsSeq Methods; PropertiesSeq Properties; + SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; }; typedef std::vector ClassesSeq; @@ -279,6 +296,7 @@ template <> struct MappingTraits { IO.mapOptional("SwiftConformsTo", C.SwiftConformance); IO.mapOptional("Methods", C.Methods); IO.mapOptional("Properties", C.Properties); + IO.mapOptional("SwiftSafety", C.SafetyKind, SwiftSafetyKind::None); } }; } // namespace yaml @@ -297,6 +315,7 @@ struct Function { StringRef Type; StringRef ResultType; StringRef SwiftReturnOwnership; + SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; }; typedef std::vector FunctionsSeq; @@ -321,6 +340,7 @@ template <> struct MappingTraits { IO.mapOptional("ResultType", F.ResultType, StringRef("")); IO.mapOptional("SwiftReturnOwnership", F.SwiftReturnOwnership, StringRef("")); + IO.mapOptional("SwiftSafety", F.SafetyKind, SwiftSafetyKind::None); } }; } // namespace yaml @@ -334,6 +354,7 @@ struct GlobalVariable { std::optional SwiftPrivate; StringRef SwiftName; StringRef Type; + SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; }; typedef std::vector GlobalVariablesSeq; @@ -353,6 +374,7 @@ template <> struct MappingTraits { IO.mapOptional("SwiftPrivate", GV.SwiftPrivate); IO.mapOptional("SwiftName", GV.SwiftName, StringRef("")); IO.mapOptional("Type", GV.Type, StringRef("")); + IO.mapOptional("SwiftSafety", GV.SafetyKind, SwiftSafetyKind::None); } }; } // namespace yaml @@ -364,6 +386,7 @@ struct EnumConstant { AvailabilityItem Availability; std::optional SwiftPrivate; StringRef SwiftName; + SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; }; typedef std::vector EnumConstantsSeq; @@ -381,6 +404,7 @@ template <> struct MappingTraits { IO.mapOptional("AvailabilityMsg", EC.Availability.Msg, StringRef("")); IO.mapOptional("SwiftPrivate", EC.SwiftPrivate); IO.mapOptional("SwiftName", EC.SwiftName, StringRef("")); + IO.mapOptional("SwiftSafety", EC.SafetyKind, SwiftSafetyKind::None); } }; } // namespace yaml @@ -424,6 +448,7 @@ struct Field { std::optional SwiftPrivate; StringRef SwiftName; StringRef Type; + SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; }; typedef std::vector FieldsSeq; @@ -443,6 +468,7 @@ template <> struct MappingTraits { IO.mapOptional("SwiftPrivate", F.SwiftPrivate); IO.mapOptional("SwiftName", F.SwiftName, StringRef("")); IO.mapOptional("Type", F.Type, StringRef("")); + IO.mapOptional("SwiftSafety", F.SafetyKind, SwiftSafetyKind::None); } }; } // namespace yaml @@ -470,6 +496,7 @@ struct Tag { std::optional EnumConvenienceKind; std::optional SwiftCopyable; std::optional SwiftEscapable; + SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; FunctionsSeq Methods; FieldsSeq Fields; @@ -515,6 +542,7 @@ template <> struct MappingTraits { IO.mapOptional("Methods", T.Methods); IO.mapOptional("Fields", T.Fields); IO.mapOptional("Tags", T.Tags); + IO.mapOptional("SwiftSafety", T.SafetyKind, SwiftSafetyKind::None); } }; } // namespace yaml @@ -530,6 +558,7 @@ struct Typedef { std::optional NSErrorDomain; std::optional SwiftType; std::optional SwiftConformance; + const SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; }; typedef std::vector TypedefsSeq; @@ -602,6 +631,7 @@ struct Namespace { StringRef SwiftName; std::optional SwiftPrivate; TopLevelItems Items; + const SwiftSafetyKind SafetyKind = SwiftSafetyKind::None; }; } // namespace @@ -797,6 +827,8 @@ class YAMLConverter { StringRef APIName) { convertAvailability(Common.Availability, Info, APIName); Info.setSwiftPrivate(Common.SwiftPrivate); + if (Common.SafetyKind != SwiftSafetyKind::None) + Info.setSwiftSafety(Common.SafetyKind); Info.SwiftName = std::string(Common.SwiftName); } @@ -956,6 +988,8 @@ class YAMLConverter { void convertFunction(const Function &Function, FuncOrMethodInfo &FI) { convertAvailability(Function.Availability, FI, Function.Name); FI.setSwiftPrivate(Function.SwiftPrivate); + if (Function.SafetyKind != SwiftSafetyKind::None) + FI.setSwiftSafety(Function.SafetyKind); FI.SwiftName = std::string(Function.SwiftName); std::optional This; convertParams(Function.Params, FI, This); diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index 4cc1b76264340..99a29add8211d 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -13,6 +13,7 @@ #include "CheckExprLifetime.h" #include "TypeLocBuilder.h" #include "clang/APINotes/APINotesReader.h" +#include "clang/APINotes/Types.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" @@ -291,6 +292,29 @@ static void ProcessAPINotes(Sema &S, Decl *D, }); } + // swift_safety + if (auto SafetyKind = Info.getSwiftSafety()) { + bool Addition = *SafetyKind != api_notes::SwiftSafetyKind::Unspecified; + handleAPINotedAttribute( + S, D, Addition, Metadata, + [&] { + return SwiftAttrAttr::Create( + S.Context, *SafetyKind == api_notes::SwiftSafetyKind::Safe + ? "safe" + : "unsafe"); + }, + [](const Decl *D) { + return llvm::find_if(D->attrs(), [](const Attr *attr) { + if (const auto *swiftAttr = dyn_cast(attr)) { + if (swiftAttr->getAttribute() == "safe" || + swiftAttr->getAttribute() == "unsafe") + return true; + } + return false; + }); + }); + } + // swift_name if (!Info.SwiftName.empty()) { handleAPINotedAttribute( diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes index 15c806842d08f..7e9cac32df3a5 100644 --- a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes +++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes @@ -35,6 +35,14 @@ Tags: - Name: NoncopyableWithDestroyType SwiftCopyable: false SwiftDestroyOp: NCDDestroy +- Name: ImportAsUnsafeStruct + SwiftSafety: unsafe +- Name: StructWithUnsafeMethod + Methods: + - Name: ImportAsUnsafeMethod + SwiftSafety: unsafe + - Name: ImportAsUnsafeMethodActuallySafe + SwiftSafety: safe Functions: - Name: functionReturningFrt__ @@ -42,7 +50,20 @@ Functions: SwiftReturnOwnership: unretained - Name: functionReturningFrt_returns_retained SwiftReturnOwnership: retained + - Name: ImportAsUnsafe + SwiftSafety: unsafe + - Name: ImportAsUnsafeAlreadyAnnotated + SwiftSafety: unspecified Typedefs: - Name: WrappedOptions SwiftWrapper: struct SwiftConformsTo: Swift.OptionSet +SwiftVersions: + - Version: 3.0 + Functions: + - Name: ImportAsUnsafeVersioned + SwiftSafety: unsafe + - Version: 6.0 + Functions: + - Name: ImportAsUnsafeVersioned + SwiftSafety: safe diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h index 978b4fbbb3b00..272e3865ab2ba 100644 --- a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h +++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h @@ -36,3 +36,14 @@ struct NoncopyableWithDestroyType { }; void NCDDestroy(NoncopyableWithDestroyType instance); + +void ImportAsUnsafe(); +struct ImportAsUnsafeStruct { +}; +struct StructWithUnsafeMethod { + void ImportAsUnsafeMethod(); + void ImportAsUnsafeMethodActuallySafe(); +}; + +void ImportAsUnsafeAlreadyAnnotated() __attribute__((swift_attr("unsafe"))); +void ImportAsUnsafeVersioned(); diff --git a/clang/test/APINotes/swift-import-as.cpp b/clang/test/APINotes/swift-import-as.cpp index f5d08df7c6a1b..20d38b5a0968d 100644 --- a/clang/test/APINotes/swift-import-as.cpp +++ b/clang/test/APINotes/swift-import-as.cpp @@ -16,6 +16,7 @@ // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter methodReturningFrt_returns_retained | FileCheck -check-prefix=CHECK-METHOD-RETURNING-FRT-RETAINED %s // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter WrappedOptions | FileCheck -check-prefix=CHECK-WRAPPED-OPTIONS %s // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter NoncopyableWithDestroyType | FileCheck -check-prefix=CHECK-NONCOPYABLE-WITH-DESTROY %s +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter ImportAsUnsafe | FileCheck -check-prefix=CHECK-IMPORT-AS-UNSAFE %s #include @@ -103,3 +104,32 @@ // CHECK-NONCOPYABLE-WITH-DESTROY: RecordDecl {{.*}}struct NoncopyableWithDestroyType // CHECK-NONCOPYABLE-WITH-DESTROY: SwiftAttrAttr {{.+}} "destroy:NCDDestroy" // CHECK-NONCOPYABLE-WITH-DESTROY: SwiftAttrAttr {{.+}} "~Copyable" + +// CHECK-IMPORT-AS-UNSAFE: Dumping ImportAsUnsafe: +// CHECK-IMPORT-AS-UNSAFE: FunctionDecl {{.+}} ImportAsUnsafe +// CHECK-IMPORT-AS-UNSAFE: SwiftAttrAttr {{.+}} "unsafe" + +// CHECK-IMPORT-AS-UNSAFE: Dumping ImportAsUnsafeStruct: +// CHECK-IMPORT-AS-UNSAFE: CXXRecordDecl {{.+}} ImportAsUnsafeStruct +// CHECK-IMPORT-AS-UNSAFE: SwiftAttrAttr {{.+}} "unsafe" + +// CHECK-IMPORT-AS-UNSAFE: Dumping StructWithUnsafeMethod::ImportAsUnsafeMethod: +// CHECK-IMPORT-AS-UNSAFE: CXXMethodDecl {{.+}} ImportAsUnsafeMethod +// CHECK-IMPORT-AS-UNSAFE: SwiftAttrAttr {{.+}} "unsafe" + +// CHECK-IMPORT-AS-UNSAFE: Dumping StructWithUnsafeMethod::ImportAsUnsafeMethodActuallySafe: +// CHECK-IMPORT-AS-UNSAFE: CXXMethodDecl {{.+}} ImportAsUnsafeMethodActuallySafe +// CHECK-IMPORT-AS-UNSAFE: SwiftAttrAttr {{.+}} "safe" + +// CHECK-IMPORT-AS-UNSAFE: Dumping ImportAsUnsafeAlreadyAnnotated: +// CHECK-IMPORT-AS-UNSAFE: FunctionDecl {{.+}} ImportAsUnsafeAlreadyAnnotated +// CHECK-IMPORT-AS-UNSAFE: SwiftVersionedAdditionAttr {{.+}} IsReplacedByActive +// CHECK-IMPORT-AS-UNSAFE: SwiftAttrAttr {{.+}} "unsafe" +// CHECK-IMPORT-AS-UNSAFE-EMPTY: + +// CHECK-IMPORT-AS-UNSAFE: Dumping ImportAsUnsafeVersioned: +// CHECK-IMPORT-AS-UNSAFE: FunctionDecl {{.+}} ImportAsUnsafeVersioned +// CHECK-IMPORT-AS-UNSAFE: SwiftVersionedAdditionAttr {{.+}} 3.0 +// CHECK-IMPORT-AS-UNSAFE: SwiftAttrAttr {{.+}} "unsafe" +// CHECK-IMPORT-AS-UNSAFE: SwiftVersionedAdditionAttr {{.+}} 6.0 +// CHECK-IMPORT-AS-UNSAFE: SwiftAttrAttr {{.+}} "safe" From 148a83543b2fdacdeac71ff49d3f76e386cf6f91 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 15 Sep 2025 11:34:06 +0100 Subject: [PATCH 321/734] [LV] Introduce m_One and improve (0|1)-match (NFC) (#157419) --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 8 ++++++++ llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 14 ++++++-------- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 5 ++--- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c04b5cb10eac2..640a98c622f80 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9557,7 +9557,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { auto ResumePhiIter = find_if(MainScalarPH->phis(), [VectorTC](VPRecipeBase &R) { return match(&R, m_VPInstruction(m_Specific(VectorTC), - m_SpecificInt(0))); + m_ZeroInt())); }); VPPhi *ResumePhi = nullptr; if (ResumePhiIter == MainScalarPH->phis().end()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 109156c1469c5..8b94378467706 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -149,12 +149,20 @@ struct is_zero_int { bool isValue(const APInt &C) const { return C.isZero(); } }; +struct is_one { + bool isValue(const APInt &C) const { return C.isOne(); } +}; + /// Match an integer 0 or a vector with all elements equal to 0. /// For vectors, this includes constants with undefined elements. inline int_pred_ty m_ZeroInt() { return int_pred_ty(); } +/// Match an integer 1 or a vector with all elements equal to 1. +/// For vectors, this includes constants with undefined elements. +inline int_pred_ty m_One() { return int_pred_ty(); } + /// Matching combinators template struct match_combine_or { LTy L; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 95e3196478176..723363fba5724 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -328,7 +328,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, // Pick out opcode, type/ext information and use sub side effects from a widen // recipe. auto HandleWiden = [&](VPWidenRecipe *Widen) { - if (match(Widen, m_Sub(m_SpecificInt(0), m_VPValue(Op)))) { + if (match(Widen, m_Sub(m_ZeroInt(), m_VPValue(Op)))) { Widen = dyn_cast(Op->getDefiningRecipe()); } Opcode = Widen->getOpcode(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a193c438e7ea8..503140213c116 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1134,10 +1134,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith( Builder.createLogicalAnd(X, Builder.createLogicalAnd(Y, Z))); - if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) + if (match(Def, m_c_Mul(m_VPValue(A), m_One()))) return Def->replaceAllUsesWith(A); - if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(0)))) + if (match(Def, m_c_Mul(m_VPValue(A), m_ZeroInt()))) return Def->replaceAllUsesWith(R.getOperand(0) == A ? R.getOperand(1) : R.getOperand(0)); @@ -1176,16 +1176,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { } // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0. - if ((match(Def, - m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) || - match(Def, - m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) && + if ((match(Def, m_DerivedIV(m_ZeroInt(), m_VPValue(A), m_One())) || + match(Def, m_DerivedIV(m_ZeroInt(), m_ZeroInt(), m_VPValue()))) && TypeInfo.inferScalarType(Def->getOperand(1)) == TypeInfo.inferScalarType(Def)) return Def->replaceAllUsesWith(Def->getOperand(1)); - if (match(Def, m_VPInstruction( - m_VPValue(X), m_SpecificInt(1)))) { + if (match(Def, m_VPInstruction(m_VPValue(X), + m_One()))) { Type *WideStepTy = TypeInfo.inferScalarType(Def); if (TypeInfo.inferScalarType(X) != WideStepTy) X = Builder.createWidenCast(Instruction::Trunc, X, WideStepTy); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index ce5949485e63d..180b1b96b6364 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -238,7 +238,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, if (Part != 1) continue; VPValue *StartV; - if (match(VPI->getOperand(2), m_SpecificInt(1))) { + if (match(VPI->getOperand(2), m_One())) { StartV = VPI->getOperand(1); } else { auto *C = VPI->clone(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index c6c1ef3369825..ddc4ad1977401 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -65,10 +65,9 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) { VPValue *A, *B; using namespace VPlanPatternMatch; - if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_SpecificInt(1)))) + if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One()))) return B == Plan.getTripCount() && - (match(A, m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()), - m_SpecificInt(1), + (match(A, m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()), m_One(), m_Specific(&Plan.getVF()))) || IsWideCanonicalIV(A)); From 2848e2801260e4cdb7468523a7c9a7df1223749e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 15 Sep 2025 11:56:57 +0100 Subject: [PATCH 322/734] [LV] Add test with partial reduction without narrowing. --- .../AArch64/partial-reduce-chained.ll | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll index c444d5bcc82c7..c0995ec150c8d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll @@ -1381,6 +1381,131 @@ for.body: ; preds = %for.body.preheader, br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1 } +define i32 @red_extended_add_chain(ptr %start, ptr %end, i32 %offset) { +; CHECK-NEON-LABEL: define i32 @red_extended_add_chain( +; CHECK-NEON-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEON-NEXT: entry: +; CHECK-NEON-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEON-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEON-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1 +; CHECK-NEON-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] +; CHECK-NEON-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16 +; CHECK-NEON-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEON: vector.ph: +; CHECK-NEON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 16 +; CHECK-NEON-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEON-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] +; CHECK-NEON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[OFFSET]], i64 0 +; CHECK-NEON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEON-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-NEON: vector.body: +; CHECK-NEON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEON-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEON-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] +; CHECK-NEON-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; CHECK-NEON-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <16 x i32> @llvm.experimental.vector.partial.reduce.add.v16i32.v16i32(<16 x i32> [[VEC_PHI]], <16 x i32> [[TMP3]]) +; CHECK-NEON-NEXT: [[TMP4]] = add <16 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]] +; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEON-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEON-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEON: middle.block: +; CHECK-NEON-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP4]]) +; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEON: scalar.ph: +; +; CHECK-SVE-LABEL: define i32 @red_extended_add_chain( +; CHECK-SVE-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SVE-NEXT: entry: +; CHECK-SVE-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-SVE-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-SVE-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1 +; CHECK-SVE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] +; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 +; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]] +; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-SVE: vector.ph: +; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] +; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-SVE-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] +; CHECK-SVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[OFFSET]], i64 0 +; CHECK-SVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-SVE: vector.body: +; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-SVE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-SVE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] +; CHECK-SVE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 1 +; CHECK-SVE-NEXT: [[TMP7:%.*]] = zext [[WIDE_LOAD]] to +; CHECK-SVE-NEXT: [[TMP8:%.*]] = add [[VEC_PHI]], [[TMP7]] +; CHECK-SVE-NEXT: [[TMP9]] = add [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-SVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-SVE: middle.block: +; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP9]]) +; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-SVE: scalar.ph: +; +; CHECK-SVE-MAXBW-LABEL: define i32 @red_extended_add_chain( +; CHECK-SVE-MAXBW-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SVE-MAXBW-NEXT: entry: +; CHECK-SVE-MAXBW-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-SVE-MAXBW-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1 +; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] +; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 +; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]] +; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-SVE-MAXBW: vector.ph: +; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-SVE-MAXBW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] +; CHECK-SVE-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[OFFSET]], i64 0 +; CHECK-SVE-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-SVE-MAXBW: vector.body: +; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-SVE-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-SVE-MAXBW-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] +; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 1 +; CHECK-SVE-MAXBW-NEXT: [[TMP7:%.*]] = zext [[WIDE_LOAD]] to +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv8i32.nxv8i32( [[VEC_PHI]], [[TMP7]]) +; CHECK-SVE-MAXBW-NEXT: [[TMP8]] = add [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-SVE-MAXBW: middle.block: +; CHECK-SVE-MAXBW-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32( [[TMP8]]) +; CHECK-SVE-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; CHECK-SVE-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-SVE-MAXBW: scalar.ph: +; +entry: + br label %loop + +loop: + %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] + %l = load i8, ptr %ptr.iv, align 1 + %l.ext = zext i8 %l to i32 + %add = add i32 %red, %l.ext + %red.next = add i32 %add, %offset + %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 + %ec = icmp eq ptr %ptr.iv, %end + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %red.next +} attributes #0 = { vscale_range(1,16) } From 641ed9f66fbc931f301dd123a08bcc2d3c83ee9d Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 15 Sep 2025 19:02:09 +0800 Subject: [PATCH 323/734] [X86] Handle undef/zero/ones cases after modifying Ops and Masks (#158428) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes https://github.com/llvm/llvm-project/issues/158415. After `resolveTargetShuffleInputsAndMask` and other modifications on `Ops` and `Mask`, unused inputs in `Ops` are erased, and may leave `Ops` empty. This patch handles such cases before calling the final `combineX86ShuffleChain`。 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 ++ .../X86/vector-shuffle-combining-avx2.ll | 113 ++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eeb5eb8a262de..f81efdc6414aa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41567,6 +41567,17 @@ static SDValue combineX86ShufflesRecursively( resolveTargetShuffleInputsAndMask(Ops, Mask); } + // Handle the all undef/zero/ones cases. + if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) + return DAG.getUNDEF(RootVT); + if (all_of(Mask, [](int Idx) { return Idx < 0; })) + return getZeroVector(RootVT, Subtarget, DAG, DL); + if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) && + !llvm::is_contained(Mask, SM_SentinelZero)) + return getOnesVector(RootVT, DAG, DL); + + assert(!Ops.empty() && "Shuffle with no inputs detected"); + // We can only combine unary and binary shuffle mask cases. if (Ops.size() <= 2) { // Minor canonicalization of the accumulated shuffle mask to make it easier diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index f7764b1593b51..298858a8fcc73 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -1092,3 +1092,116 @@ define void @packss_zext_v8i1() { store <16 x i16> %tmp11, ptr undef, align 2 ret void } + +define <32 x i16> @PR158415(<8 x i8> %arg) { +; X86-AVX2-LABEL: PR158415: +; X86-AVX2: # %bb.0: # %entry +; X86-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4] +; X86-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] +; X86-AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; X86-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,24],zero,ymm0[25],zero,ymm0[30],zero,ymm0[31],zero,ymm0[u,u,u,u,u,u,u,u] +; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] +; X86-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] +; X86-AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[12,13,14,15],zero,zero,ymm1[4,5,u,u,u,u,u,u,u,u,28,29,30,31],zero,zero,ymm1[20,21],zero,zero,ymm1[26,27,28,29,30,31] +; X86-AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,0,2] +; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; X86-AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13] +; X86-AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpbroadcastw %xmm1, %ymm3 +; X86-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm3[2],ymm0[3,4,5,6,7,8,9],ymm3[10],ymm0[11,12,13,14,15] +; X86-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] +; X86-AVX2-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[14,15],zero,zero,zero,zero,xmm2[u,u],zero,zero +; X86-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 +; X86-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; X86-AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7] +; X86-AVX2-NEXT: retl +; +; X86-AVX512-LABEL: PR158415: +; X86-AVX512: # %bb.0: # %entry +; X86-AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4] +; X86-AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] +; X86-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; X86-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero +; X86-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; X86-AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; X86-AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1 +; X86-AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] +; X86-AVX512-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; X86-AVX512-NEXT: vpbroadcastd %xmm0, %ymm0 +; X86-AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; X86-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0 +; X86-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X86-AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2 +; X86-AVX512-NEXT: vpsrld $16, %xmm2, %xmm2 +; X86-AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; X86-AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X86-AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,3] +; X86-AVX512-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] +; X86-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; X86-AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 +; X86-AVX512-NEXT: retl +; +; X64-AVX2-LABEL: PR158415: +; X64-AVX2: # %bb.0: # %entry +; X64-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4] +; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] +; X64-AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,24],zero,ymm0[25],zero,ymm0[30],zero,ymm0[31],zero,ymm0[u,u,u,u,u,u,u,u] +; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] +; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] +; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[12,13,14,15],zero,zero,ymm1[4,5,u,u,u,u,u,u,u,u,28,29,30,31],zero,zero,ymm1[20,21],zero,zero,ymm1[26,27,28,29,30,31] +; X64-AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,0,2] +; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; X64-AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13] +; X64-AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpbroadcastw %xmm1, %ymm3 +; X64-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm3[2],ymm0[3,4,5,6,7,8,9],ymm3[10],ymm0[11,12,13,14,15] +; X64-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] +; X64-AVX2-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[14,15],zero,zero,zero,zero,xmm2[u,u],zero,zero +; X64-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 +; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; X64-AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7] +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: PR158415: +; X64-AVX512: # %bb.0: # %entry +; X64-AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4] +; X64-AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] +; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; X64-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero +; X64-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; X64-AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; X64-AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1 +; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] +; X64-AVX512-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; X64-AVX512-NEXT: vpbroadcastd %xmm0, %ymm0 +; X64-AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 +; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X64-AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2 +; X64-AVX512-NEXT: vpsrld $16, %xmm2, %xmm2 +; X64-AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; X64-AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,3] +; X64-AVX512-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] +; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; X64-AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; X64-AVX512-NEXT: retq +entry: + %shuffle2 = shufflevector <8 x i8> %arg, <8 x i8> zeroinitializer, <32 x i32> + %conv3 = zext <32 x i8> %shuffle2 to <32 x i16> + %shuffle4 = shufflevector <32 x i16> zeroinitializer, <32 x i16> %conv3, <32 x i32> + %not = xor <32 x i16> %shuffle4, splat (i16 1) + %shuffle5 = shufflevector <32 x i16> zeroinitializer, <32 x i16> %not, <32 x i32> + ret <32 x i16> %shuffle5 +} From 29b6433bfbd6b778d6a6686cac96ae4b7640224e Mon Sep 17 00:00:00 2001 From: Scott Manley Date: Mon, 15 Sep 2025 07:28:47 -0400 Subject: [PATCH 324/734] [OpenACC] verify acc::DeclareEnterOp operand not BlockArgument (#158095) Check that the operand of acc::DeclareEnterOp is a BlockArgument before trying to get its defining operation so it will not segfault and instead produce a clean error. Add test case. --- mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 3 ++- mlir/test/Dialect/OpenACC/invalid.mlir | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index ded4c7ab27274..b82ad20d8e194 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -3513,7 +3513,8 @@ checkDeclareOperands(Op &op, const mlir::ValueRange &operands, "at least one operand must appear on the declare operation"); for (mlir::Value operand : operands) { - if (!mlir::isa(operand) || + !mlir::isa( operand.getDefiningOp())) diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index 68afd9fccba79..24ce9784393b0 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -831,3 +831,12 @@ func.func @acc_loop_container() { %value = memref.alloc() : memref // expected-error @below {{invalid data clause modifiers: readonly}} %0 = acc.create varPtr(%value : memref) -> memref {modifiers = #acc} + +// ----- + +func.func @verify_declare_enter(%arg0 : memref) { +// expected-error @below {{expect valid declare data entry operation or acc.getdeviceptr as defining op}} + %0 = acc.declare_enter dataOperands(%arg0 : memref) + acc.declare_exit token(%0) dataOperands(%arg0 : memref) + return +} From b0de4e67775869a9e0a7c95236335084165e90ce Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 15 Sep 2025 13:25:59 +0100 Subject: [PATCH 325/734] [mlir][tosa] Add support for BF16 in `tosa.resize` legalization (#158616) Extend the resize linalg legalization functionality with BF16 support and in accordance to the TOSA specification. Signed-off-by: Georgios Pinitas --- .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 4 +- .../TosaToLinalg/tosa-to-linalg-resize.mlir | 44 +++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 0a6f2477560a1..1955eec9964eb 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -1827,8 +1827,8 @@ class GenericResizeConverter : public OpRewritePattern { auto resultTy = cast(op.getType()); auto resultETy = resultTy.getElementType(); - bool floatingPointMode = resultETy.isF16() || resultETy.isF32(); - auto floatTy = resultETy.isF16() ? b.getF16Type() : b.getF32Type(); + bool floatingPointMode = isa(resultETy); + auto floatTy = resultETy; auto imageH = inputTy.getShape()[1]; auto imageW = inputTy.getShape()[2]; diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir index ff2cbbc0b3938..6998aee45b887 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir @@ -12,6 +12,18 @@ func.func @unary_resize_nearest_fp32(%arg0 : tensor<3x1x1x7xf32>) -> tensor<3x1x // ----- +// CHECK-LABEL: @unary_resize_nearest_bf16 +func.func @unary_resize_nearest_bf16(%arg0 : tensor<3x1x1x7xbf16>) -> tensor<3x1x1x7xbf16> { + %scale = tosa.const_shape { values = dense<[2, 2, 1, 1]> : tensor<4xindex> } : () -> !tosa.shape<4> + %offset = tosa.const_shape { values = dense<0> : tensor<2xindex> } : () -> !tosa.shape<2> + %border = tosa.const_shape { values = dense<0> : tensor<2xindex> } : () -> !tosa.shape<2> + %resize = tosa.resize %arg0, %scale, %offset, %border {mode = NEAREST_NEIGHBOR} : (tensor<3x1x1x7xbf16>, !tosa.shape<4>, !tosa.shape<2>, !tosa.shape<2>) -> tensor<3x1x1x7xbf16> + // CHECK: return %arg0 + return %resize : tensor<3x1x1x7xbf16> +} + +// ----- + // CHECK-LABEL: @unary_resize_nearest_fp16 func.func @unary_resize_nearest_fp16(%arg0 : tensor<3x1x1x7xf16>) -> tensor<3x1x1x7xf16> { %scale = tosa.const_shape { values = dense<[2, 2, 1, 1]> : tensor<4xindex> } : () -> !tosa.shape<4> @@ -36,6 +48,18 @@ func.func @unary_resize_bilinear_fp32(%arg0 : tensor<3x1x1x7xf32>) -> tensor<3x1 // ----- +// CHECK-LABEL: @unary_resize_bilinear_bf16 +func.func @unary_resize_bilinear_bf16(%arg0 : tensor<3x1x1x7xbf16>) -> tensor<3x1x1x7xbf16> { + %scale = tosa.const_shape { values = dense<[2, 2, 1, 1]> : tensor<4xindex> } : () -> !tosa.shape<4> + %offset = tosa.const_shape { values = dense<0> : tensor<2xindex> } : () -> !tosa.shape<2> + %border = tosa.const_shape { values = dense<0> : tensor<2xindex> } : () -> !tosa.shape<2> + %resize = tosa.resize %arg0, %scale, %offset, %border {mode = BILINEAR} : (tensor<3x1x1x7xbf16>, !tosa.shape<4>, !tosa.shape<2>, !tosa.shape<2>) -> tensor<3x1x1x7xbf16> + // CHECK: return %arg0 + return %resize : tensor<3x1x1x7xbf16> +} + +// ----- + // CHECK-LABEL: @unary_resize_bilinear_fp16 func.func @unary_resize_bilinear_fp16(%arg0 : tensor<3x1x1x7xf16>) -> tensor<3x1x1x7xf16> { %scale = tosa.const_shape { values = dense<[2, 2, 1, 1]> : tensor<4xindex> } : () -> !tosa.shape<4> @@ -60,6 +84,26 @@ func.func @unary_resize_nearest_i8(%arg0 : tensor<3x1x1x7xi8>) -> tensor<3x1x1x7 // ----- +// CHECK-LABEL: @broadcast_resize_nearest_bf16 +func.func @broadcast_resize_nearest_bf16(%arg0 : tensor<3x1x1x7xbf16>) -> tensor<3x1x5x7xbf16> { + // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0 + // CHECK-NEXT{literal}: [[0], [1, 2, 3]] : tensor<3x1x1x7xbf16> into tensor<3x7xbf16> + // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x1x5x7xbf16> + // CHECK: %[[GENERIC:.+]] = linalg.generic + // CHECK-SAME: indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} + // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xbf16>) outs(%[[EMPTY]] : tensor<3x1x5x7xbf16>) + // CHECK: ^bb0(%[[IN:.+]]: bf16, %[[OUT:.+]]: bf16): + // CHECK: linalg.yield %[[IN]] : bf16 + %scale = tosa.const_shape { values = dense<[2, 1, 3, 1]> : tensor<4xindex> } : () -> !tosa.shape<4> + %offset = tosa.const_shape { values = dense<0> : tensor<2xindex> } : () -> !tosa.shape<2> + %border = tosa.const_shape { values = dense<0> : tensor<2xindex> } : () -> !tosa.shape<2> + %resize = tosa.resize %arg0, %scale, %offset, %border {mode = NEAREST_NEIGHBOR} : (tensor<3x1x1x7xbf16>, !tosa.shape<4>, !tosa.shape<2>, !tosa.shape<2>) -> tensor<3x1x5x7xbf16> + + return %resize : tensor<3x1x5x7xbf16> +} + +// ----- + // CHECK-LABEL: @broadcast_resize_nearest_f32 func.func @broadcast_resize_nearest_f32(%arg0 : tensor<3x1x1x7xf32>) -> tensor<3x1x5x7xf32> { // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0 From 471bd1745ef044a7ee58a4947bf06a7f56660502 Mon Sep 17 00:00:00 2001 From: Amir Bishara <139038766+amirBish@users.noreply.github.com> Date: Mon, 15 Sep 2025 15:29:54 +0300 Subject: [PATCH 326/734] [mlir][func]-Add deduplicate funcOp arguments transform (#158266) This PR adds a new transform operation which removes the duplicate arguments from the function operation based on the callOp of this function. To have a more simple implementation for now, the transform will fail when having multiple callOps for the same function we want to eliminate the different arguments from. This pull request also adpat the utils under the func dialect to be reusable also for this transformOp. --- .../Func/TransformOps/FuncTransformOps.td | 26 ++ mlir/include/mlir/Dialect/Func/Utils/Utils.h | 41 +++- .../Func/TransformOps/FuncTransformOps.cpp | 64 ++++- mlir/lib/Dialect/Func/Utils/Utils.cpp | 232 ++++++++++++++---- .../Dialect/Func/func-transform-invalid.mlir | 89 +++++++ mlir/test/Dialect/Func/func-transform.mlir | 62 +++++ 6 files changed, 447 insertions(+), 67 deletions(-) diff --git a/mlir/include/mlir/Dialect/Func/TransformOps/FuncTransformOps.td b/mlir/include/mlir/Dialect/Func/TransformOps/FuncTransformOps.td index 4062f310c6521..b64b3fcdb275b 100644 --- a/mlir/include/mlir/Dialect/Func/TransformOps/FuncTransformOps.td +++ b/mlir/include/mlir/Dialect/Func/TransformOps/FuncTransformOps.td @@ -134,4 +134,30 @@ def ReplaceFuncSignatureOp }]; } +def DeduplicateFuncArgsOp + : Op, + DeclareOpInterfaceMethods]> { + let description = [{ + This transform takes a module and a function name, and deduplicates + the arguments of the function. The function is expected to be defined in + the module. + + This transform will emit a silenceable failure if: + - The function with the given name does not exist in the module. + - The function does not have duplicate arguments. + - The function does not have a single call. + }]; + + let arguments = (ins TransformHandleTypeInterface:$module, + SymbolRefAttr:$function_name); + let results = (outs TransformHandleTypeInterface:$transformed_module, + TransformHandleTypeInterface:$transformed_function); + + let assemblyFormat = [{ + $function_name + `at` $module attr-dict `:` functional-type(operands, results) + }]; +} + #endif // FUNC_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/Func/Utils/Utils.h b/mlir/include/mlir/Dialect/Func/Utils/Utils.h index 2e8b6723a0e53..3576126a487ac 100644 --- a/mlir/include/mlir/Dialect/Func/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Func/Utils/Utils.h @@ -18,32 +18,49 @@ #include "mlir/IR/PatternMatch.h" #include "llvm/ADT/ArrayRef.h" +#include namespace mlir { +class ModuleOp; + namespace func { class FuncOp; class CallOp; /// Creates a new function operation with the same name as the original -/// function operation, but with the arguments reordered according to -/// the `newArgsOrder` and `newResultsOrder`. +/// function operation, but with the arguments mapped according to +/// the `oldArgToNewArg` and `oldResToNewRes`. /// The `funcOp` operation must have exactly one block. /// Returns the new function operation or failure if `funcOp` doesn't /// have exactly one block. -FailureOr -replaceFuncWithNewOrder(RewriterBase &rewriter, FuncOp funcOp, - llvm::ArrayRef newArgsOrder, - llvm::ArrayRef newResultsOrder); +/// Note: the method asserts that the `oldArgToNewArg` and `oldResToNewRes` +/// maps the whole function arguments and results. +mlir::FailureOr replaceFuncWithNewMapping( + mlir::RewriterBase &rewriter, mlir::func::FuncOp funcOp, + ArrayRef oldArgIdxToNewArgIdx, ArrayRef oldResIdxToNewResIdx); /// Creates a new call operation with the values as the original -/// call operation, but with the arguments reordered according to -/// the `newArgsOrder` and `newResultsOrder`. -CallOp replaceCallOpWithNewOrder(RewriterBase &rewriter, CallOp callOp, - llvm::ArrayRef newArgsOrder, - llvm::ArrayRef newResultsOrder); +/// call operation, but with the arguments mapped according to +/// the `oldArgToNewArg` and `oldResToNewRes`. +/// Note: the method asserts that the `oldArgToNewArg` and `oldResToNewRes` +/// maps the whole call operation arguments and results. +mlir::func::CallOp replaceCallOpWithNewMapping( + mlir::RewriterBase &rewriter, mlir::func::CallOp callOp, + ArrayRef oldArgIdxToNewArgIdx, ArrayRef oldResIdxToNewResIdx); + +/// This utility function examines all call operations within the given +/// `moduleOp` that target the specified `funcOp`. It identifies duplicate +/// operands in the call operations, creates mappings to deduplicate them, and +/// then applies the transformation to both the function and its call sites. For +/// now, it only supports one call operation for the function operation. The +/// function returns a pair containing the new funcOp and the new callOp. Note: +/// after the transformation, the original funcOp and callOp will be erased. +mlir::FailureOr> +deduplicateArgsOfFuncOp(mlir::RewriterBase &rewriter, mlir::func::FuncOp funcOp, + mlir::ModuleOp moduleOp); } // namespace func } // namespace mlir -#endif // MLIR_DIALECT_FUNC_UTILS_H +#endif // MLIR_DIALECT_FUNC_UTILS_H \ No newline at end of file diff --git a/mlir/lib/Dialect/Func/TransformOps/FuncTransformOps.cpp b/mlir/lib/Dialect/Func/TransformOps/FuncTransformOps.cpp index 935d3e5ac331b..3a42d2a367d70 100644 --- a/mlir/lib/Dialect/Func/TransformOps/FuncTransformOps.cpp +++ b/mlir/lib/Dialect/Func/TransformOps/FuncTransformOps.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/STLExtras.h" using namespace mlir; @@ -296,9 +297,16 @@ transform::ReplaceFuncSignatureOp::apply(transform::TransformRewriter &rewriter, } } - FailureOr newFuncOpOrFailure = func::replaceFuncWithNewOrder( - rewriter, funcOp, argsInterchange.getArrayRef(), - resultsInterchange.getArrayRef()); + llvm::SmallVector oldArgToNewArg(argsInterchange.size()); + for (auto [newArgIdx, oldArgIdx] : llvm::enumerate(argsInterchange)) + oldArgToNewArg[oldArgIdx] = newArgIdx; + + llvm::SmallVector oldResToNewRes(resultsInterchange.size()); + for (auto [newResIdx, oldResIdx] : llvm::enumerate(resultsInterchange)) + oldResToNewRes[oldResIdx] = newResIdx; + + FailureOr newFuncOpOrFailure = func::replaceFuncWithNewMapping( + rewriter, funcOp, oldArgToNewArg, oldResToNewRes); if (failed(newFuncOpOrFailure)) return emitSilenceableFailure(getLoc()) << "failed to replace function signature '" << getFunctionName() @@ -312,9 +320,8 @@ transform::ReplaceFuncSignatureOp::apply(transform::TransformRewriter &rewriter, }); for (func::CallOp callOp : callOps) - func::replaceCallOpWithNewOrder(rewriter, callOp, - argsInterchange.getArrayRef(), - resultsInterchange.getArrayRef()); + func::replaceCallOpWithNewMapping(rewriter, callOp, oldArgToNewArg, + oldResToNewRes); } results.set(cast(getTransformedModule()), {targetModuleOp}); @@ -330,6 +337,51 @@ void transform::ReplaceFuncSignatureOp::getEffects( transform::modifiesPayload(effects); } +//===----------------------------------------------------------------------===// +// DeduplicateFuncArgsOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure +transform::DeduplicateFuncArgsOp::apply(transform::TransformRewriter &rewriter, + transform::TransformResults &results, + transform::TransformState &state) { + auto payloadOps = state.getPayloadOps(getModule()); + if (!llvm::hasSingleElement(payloadOps)) + return emitDefiniteFailure() << "requires a single module to operate on"; + + auto targetModuleOp = dyn_cast(*payloadOps.begin()); + if (!targetModuleOp) + return emitSilenceableFailure(getLoc()) + << "target is expected to be module operation"; + + func::FuncOp funcOp = + targetModuleOp.lookupSymbol(getFunctionName()); + if (!funcOp) + return emitSilenceableFailure(getLoc()) + << "function with name '" << getFunctionName() << "' is not found"; + + auto transformationResult = + func::deduplicateArgsOfFuncOp(rewriter, funcOp, targetModuleOp); + if (failed(transformationResult)) + return emitSilenceableFailure(getLoc()) + << "failed to deduplicate function arguments of function " + << funcOp.getName(); + + auto [newFuncOp, newCallOp] = *transformationResult; + + results.set(cast(getTransformedModule()), {targetModuleOp}); + results.set(cast(getTransformedFunction()), {newFuncOp}); + + return DiagnosedSilenceableFailure::success(); +} + +void transform::DeduplicateFuncArgsOp::getEffects( + SmallVectorImpl &effects) { + transform::consumesHandle(getModuleMutable(), effects); + transform::producesHandle(getOperation()->getOpResults(), effects); + transform::modifiesPayload(effects); +} + //===----------------------------------------------------------------------===// // Transform op registration //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Func/Utils/Utils.cpp b/mlir/lib/Dialect/Func/Utils/Utils.cpp index f781ed2d591b4..b4cb0932ef631 100644 --- a/mlir/lib/Dialect/Func/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Func/Utils/Utils.cpp @@ -14,35 +14,101 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/IRMapping.h" #include "mlir/IR/PatternMatch.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DebugLog.h" + +#define DEBUG_TYPE "func-utils" using namespace mlir; +/// This method creates an inverse mapping of the provided map `oldToNew`. +/// Given an array where `oldIdxToNewIdx[i] = j` means old index `i` maps +/// to new index `j`, +/// This method returns a vector where `result[j]` contains all old indices +/// that map to new index `j`. +/// +/// Example: +/// ``` +/// oldIdxToNewIdx = [0, 1, 2, 2, 3] +/// getInverseMapping(oldIdxToNewIdx) = [[0], [1], [2, 3], [4]] +/// ``` +/// +static llvm::SmallVector> +getInverseMapping(ArrayRef oldIdxToNewIdx) { + int numOfNewIdxs = 0; + if (!oldIdxToNewIdx.empty()) + numOfNewIdxs = 1 + *llvm::max_element(oldIdxToNewIdx); + llvm::SmallVector> newToOldIdxs(numOfNewIdxs); + for (auto [oldIdx, newIdx] : llvm::enumerate(oldIdxToNewIdx)) + newToOldIdxs[newIdx].push_back(oldIdx); + return newToOldIdxs; +} + +/// This method returns a new vector of elements that are mapped from the +/// `origElements` based on the `newIdxToOldIdxs` mapping. This function assumes +/// that the `newIdxToOldIdxs` mapping is valid, i.e. for each new index, there +/// is at least one old index that maps to it. Also, It assumes that mapping to +/// the same old index has the same element in the `origElements` vector. +template +static SmallVector getMappedElements( + ArrayRef origElements, + const llvm::SmallVector> &newIdxToOldIdxs) { + SmallVector newElements; + for (const auto &oldIdxs : newIdxToOldIdxs) { + assert(llvm::all_of(oldIdxs, + [&origElements](int idx) -> bool { + return idx >= 0 && + static_cast(idx) < origElements.size(); + }) && + "idx must be less than the number of elements in the original " + "elements"); + assert(!oldIdxs.empty() && "oldIdx must not be empty"); + Element origTypeToCheck = origElements[oldIdxs.front()]; + assert(llvm::all_of(oldIdxs, + [&](int idx) -> bool { + return origElements[idx] == origTypeToCheck; + }) && + "all oldIdxs must be equal"); + newElements.push_back(origTypeToCheck); + } + return newElements; +} + FailureOr -func::replaceFuncWithNewOrder(RewriterBase &rewriter, func::FuncOp funcOp, - ArrayRef newArgsOrder, - ArrayRef newResultsOrder) { +func::replaceFuncWithNewMapping(RewriterBase &rewriter, func::FuncOp funcOp, + ArrayRef oldArgIdxToNewArgIdx, + ArrayRef oldResIdxToNewResIdx) { // Generate an empty new function operation with the same name as the // original. - assert(funcOp.getNumArguments() == newArgsOrder.size() && - "newArgsOrder must match the number of arguments in the function"); - assert(funcOp.getNumResults() == newResultsOrder.size() && - "newResultsOrder must match the number of results in the function"); + assert(funcOp.getNumArguments() == oldArgIdxToNewArgIdx.size() && + "oldArgIdxToNewArgIdx must match the number of arguments in the " + "function"); + assert( + funcOp.getNumResults() == oldResIdxToNewResIdx.size() && + "oldResIdxToNewResIdx must match the number of results in the function"); if (!funcOp.getBody().hasOneBlock()) return rewriter.notifyMatchFailure( funcOp, "expected function to have exactly one block"); - ArrayRef origInputTypes = funcOp.getFunctionType().getInputs(); - ArrayRef origOutputTypes = funcOp.getFunctionType().getResults(); - SmallVector newInputTypes, newOutputTypes; + // We may have some duplicate arguments in the old function, i.e. + // in the mapping `newArgIdxToOldArgIdxs` for some new argument index + // there may be multiple old argument indices. + llvm::SmallVector> newArgIdxToOldArgIdxs = + getInverseMapping(oldArgIdxToNewArgIdx); + SmallVector newInputTypes = getMappedElements( + funcOp.getFunctionType().getInputs(), newArgIdxToOldArgIdxs); + SmallVector locs; - for (unsigned int idx : newArgsOrder) { - newInputTypes.push_back(origInputTypes[idx]); - locs.push_back(funcOp.getArgument(newArgsOrder[idx]).getLoc()); - } - for (unsigned int idx : newResultsOrder) - newOutputTypes.push_back(origOutputTypes[idx]); + for (const auto &oldArgIdxs : newArgIdxToOldArgIdxs) + locs.push_back(funcOp.getArgument(oldArgIdxs.front()).getLoc()); + + llvm::SmallVector> newResToOldResIdxs = + getInverseMapping(oldResIdxToNewResIdx); + SmallVector newOutputTypes = getMappedElements( + funcOp.getFunctionType().getResults(), newResToOldResIdxs); + rewriter.setInsertionPoint(funcOp); auto newFuncOp = func::FuncOp::create( rewriter, funcOp.getLoc(), funcOp.getName(), @@ -51,21 +117,21 @@ func::replaceFuncWithNewOrder(RewriterBase &rewriter, func::FuncOp funcOp, Region &newRegion = newFuncOp.getBody(); rewriter.createBlock(&newRegion, newRegion.begin(), newInputTypes, locs); newFuncOp.setVisibility(funcOp.getVisibility()); - newFuncOp->setDiscardableAttrs(funcOp->getDiscardableAttrDictionary()); // Map the arguments of the original function to the new function in // the new order and adjust the attributes accordingly. IRMapping operandMapper; SmallVector argAttrs, resultAttrs; funcOp.getAllArgAttrs(argAttrs); - for (unsigned int i = 0; i < newArgsOrder.size(); ++i) { - operandMapper.map(funcOp.getArgument(newArgsOrder[i]), - newFuncOp.getArgument(i)); - newFuncOp.setArgAttrs(i, argAttrs[newArgsOrder[i]]); - } + for (auto [oldArgIdx, newArgIdx] : llvm::enumerate(oldArgIdxToNewArgIdx)) + operandMapper.map(funcOp.getArgument(oldArgIdx), + newFuncOp.getArgument(newArgIdx)); + for (auto [newArgIdx, oldArgIdx] : llvm::enumerate(newArgIdxToOldArgIdxs)) + newFuncOp.setArgAttrs(newArgIdx, argAttrs[oldArgIdx.front()]); + funcOp.getAllResultAttrs(resultAttrs); - for (unsigned int i = 0; i < newResultsOrder.size(); ++i) - newFuncOp.setResultAttrs(i, resultAttrs[newResultsOrder[i]]); + for (auto [newResIdx, oldResIdx] : llvm::enumerate(newResToOldResIdxs)) + newFuncOp.setResultAttrs(newResIdx, resultAttrs[oldResIdx.front()]); // Clone the operations from the original function to the new function. rewriter.setInsertionPointToStart(&newFuncOp.getBody().front()); @@ -76,12 +142,11 @@ func::replaceFuncWithNewOrder(RewriterBase &rewriter, func::FuncOp funcOp, auto returnOp = cast( newFuncOp.getFunctionBody().begin()->getTerminator()); SmallVector newReturnValues; - for (unsigned int idx : newResultsOrder) - newReturnValues.push_back(returnOp.getOperand(idx)); + for (const auto &oldResIdxs : newResToOldResIdxs) + newReturnValues.push_back(returnOp.getOperand(oldResIdxs.front())); + rewriter.setInsertionPoint(returnOp); - auto newReturnOp = - func::ReturnOp::create(rewriter, newFuncOp.getLoc(), newReturnValues); - newReturnOp->setDiscardableAttrs(returnOp->getDiscardableAttrDictionary()); + func::ReturnOp::create(rewriter, newFuncOp.getLoc(), newReturnValues); rewriter.eraseOp(returnOp); rewriter.eraseOp(funcOp); @@ -90,33 +155,102 @@ func::replaceFuncWithNewOrder(RewriterBase &rewriter, func::FuncOp funcOp, } func::CallOp -func::replaceCallOpWithNewOrder(RewriterBase &rewriter, func::CallOp callOp, - ArrayRef newArgsOrder, - ArrayRef newResultsOrder) { - assert( - callOp.getNumOperands() == newArgsOrder.size() && - "newArgsOrder must match the number of operands in the call operation"); - assert( - callOp.getNumResults() == newResultsOrder.size() && - "newResultsOrder must match the number of results in the call operation"); - SmallVector newArgsOrderValues; - for (unsigned int argIdx : newArgsOrder) - newArgsOrderValues.push_back(callOp.getOperand(argIdx)); - SmallVector newResultTypes; - for (unsigned int resIdx : newResultsOrder) - newResultTypes.push_back(callOp.getResult(resIdx).getType()); +func::replaceCallOpWithNewMapping(RewriterBase &rewriter, func::CallOp callOp, + ArrayRef oldArgIdxToNewArgIdx, + ArrayRef oldResIdxToNewResIdx) { + assert(callOp.getNumOperands() == oldArgIdxToNewArgIdx.size() && + "oldArgIdxToNewArgIdx must match the number of operands in the call " + "operation"); + assert(callOp.getNumResults() == oldResIdxToNewResIdx.size() && + "oldResIdxToNewResIdx must match the number of results in the call " + "operation"); + + SmallVector origOperands = callOp.getOperands(); + SmallVector> newArgIdxToOldArgIdxs = + getInverseMapping(oldArgIdxToNewArgIdx); + SmallVector newOperandsValues = + getMappedElements(origOperands, newArgIdxToOldArgIdxs); + SmallVector> newResToOldResIdxs = + getInverseMapping(oldResIdxToNewResIdx); + SmallVector origResultTypes = llvm::to_vector(callOp.getResultTypes()); + SmallVector newResultTypes = + getMappedElements(origResultTypes, newResToOldResIdxs); // Replace the kernel call operation with a new one that has the - // reordered arguments. + // mapped arguments. rewriter.setInsertionPoint(callOp); auto newCallOp = func::CallOp::create(rewriter, callOp.getLoc(), callOp.getCallee(), - newResultTypes, newArgsOrderValues); + newResultTypes, newOperandsValues); newCallOp.setNoInlineAttr(callOp.getNoInlineAttr()); - for (auto &&[newIndex, origIndex] : llvm::enumerate(newResultsOrder)) - rewriter.replaceAllUsesWith(callOp.getResult(origIndex), - newCallOp.getResult(newIndex)); + for (auto &&[oldResIdx, newResIdx] : llvm::enumerate(oldResIdxToNewResIdx)) + rewriter.replaceAllUsesWith(callOp.getResult(oldResIdx), + newCallOp.getResult(newResIdx)); rewriter.eraseOp(callOp); return newCallOp; } + +FailureOr> +func::deduplicateArgsOfFuncOp(RewriterBase &rewriter, func::FuncOp funcOp, + ModuleOp moduleOp) { + SmallVector callOps; + auto traversalResult = moduleOp.walk([&](func::CallOp callOp) { + if (callOp.getCallee() == funcOp.getSymName()) { + if (!callOps.empty()) + // Only support one callOp for now + return WalkResult::interrupt(); + callOps.push_back(callOp); + } + return WalkResult::advance(); + }); + + if (traversalResult.wasInterrupted()) { + LDBG() << "function " << funcOp.getName() << " has more than one callOp"; + return failure(); + } + + if (callOps.empty()) { + LDBG() << "function " << funcOp.getName() << " does not have any callOp"; + return failure(); + } + + func::CallOp callOp = callOps.front(); + + // Create mapping for arguments (deduplicate operands) + SmallVector oldArgIdxToNewArgIdx(callOp.getNumOperands()); + llvm::DenseMap valueToNewArgIdx; + for (auto [operandIdx, operand] : llvm::enumerate(callOp.getOperands())) { + auto [iterator, inserted] = valueToNewArgIdx.insert( + {operand, static_cast(valueToNewArgIdx.size())}); + // Reduce the duplicate operands and maintain the original order. + oldArgIdxToNewArgIdx[operandIdx] = iterator->second; + } + + bool hasDuplicateOperands = + valueToNewArgIdx.size() != callOp.getNumOperands(); + if (!hasDuplicateOperands) { + LDBG() << "function " << funcOp.getName() + << " does not have duplicate operands"; + return failure(); + } + + // Create identity mapping for results (no deduplication needed) + SmallVector oldResIdxToNewResIdx(callOp.getNumResults()); + for (int resultIdx : llvm::seq(0, callOp.getNumResults())) + oldResIdxToNewResIdx[resultIdx] = resultIdx; + + // Apply the transformation to create new function and call operations + FailureOr newFuncOpOrFailure = replaceFuncWithNewMapping( + rewriter, funcOp, oldArgIdxToNewArgIdx, oldResIdxToNewResIdx); + if (failed(newFuncOpOrFailure)) { + LDBG() << "failed to replace function signature with name " + << funcOp.getName() << " with new order"; + return failure(); + } + + func::CallOp newCallOp = replaceCallOpWithNewMapping( + rewriter, callOp, oldArgIdxToNewArgIdx, oldResIdxToNewResIdx); + + return std::make_pair(*newFuncOpOrFailure, newCallOp); +} diff --git a/mlir/test/Dialect/Func/func-transform-invalid.mlir b/mlir/test/Dialect/Func/func-transform-invalid.mlir index e712eee83f36e..29bd58ab52742 100644 --- a/mlir/test/Dialect/Func/func-transform-invalid.mlir +++ b/mlir/test/Dialect/Func/func-transform-invalid.mlir @@ -85,3 +85,92 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +func.func private @func_with_no_duplicate_args(%arg0: memref<1xi8, 1>, %arg1: memref<2xi8, 1>, %arg2: memref<3xi8, 1>) { + %c0 = arith.constant 0 : index + %view = memref.view %arg0[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + %view0 = memref.view %arg1[%c0][] : memref<2xi8, 1> to memref<2xi8, 1> + %view1 = memref.view %arg2[%c0][] : memref<3xi8, 1> to memref<3xi8, 1> + return +} + +func.func @func_with_no_duplicate_args_caller(%arg0: memref<1xi8, 1>, %arg1: memref<2xi8, 1>, %arg2: memref<3xi8, 1>) { + call @func_with_no_duplicate_args(%arg0, %arg1, %arg2) : (memref<1xi8, 1>, memref<2xi8, 1>, memref<3xi8, 1>) -> () + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%module: !transform.any_op) { + // expected-error @+1 {{failed to deduplicate function arguments of function func_with_no_duplicate_args}} + transform.func.deduplicate_func_args @func_with_no_duplicate_args at %module : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +func.func private @func_not_found(%arg0: memref<1xi8, 1>, %arg1: memref<2xi8, 1>, %arg2: memref<3xi8, 1>) { + %c0 = arith.constant 0 : index + %view = memref.view %arg0[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + %view0 = memref.view %arg1[%c0][] : memref<2xi8, 1> to memref<2xi8, 1> + %view1 = memref.view %arg2[%c0][] : memref<3xi8, 1> to memref<3xi8, 1> + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%module: !transform.any_op) { + // expected-error @+1 {{function with name '@non_existent_func' is not found}} + transform.func.deduplicate_func_args @non_existent_func at %module : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +func.func private @func_with_multiple_calls(%arg0: memref<1xi8, 1>, %arg1: memref<1xi8, 1>) { + %c0 = arith.constant 0 : index + %view = memref.view %arg0[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + %view0 = memref.view %arg1[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + return +} + +func.func @func_with_multiple_calls_caller1(%arg0: memref<1xi8, 1>, %arg1: memref<2xi8, 1>) { + call @func_with_multiple_calls(%arg0, %arg0) : (memref<1xi8, 1>, memref<1xi8, 1>) -> () + return +} + +func.func @func_with_multiple_calls_caller2(%arg0: memref<1xi8, 1>, %arg1: memref<2xi8, 1>) { + call @func_with_multiple_calls(%arg0, %arg0) : (memref<1xi8, 1>, memref<1xi8, 1>) -> () + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%module: !transform.any_op) { + // expected-error @+1 {{failed to deduplicate function arguments of function func_with_multiple_calls}} + transform.func.deduplicate_func_args @func_with_multiple_calls at %module : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +func.func private @func_with_no_calls(%arg0: memref<1xi8, 1>, %arg1: memref<1xi8, 1>) { + %c0 = arith.constant 0 : index + %view = memref.view %arg0[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + %view0 = memref.view %arg1[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + return +} + +func.func @some_other_func() { + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%module: !transform.any_op) { + // expected-error @+1 {{failed to deduplicate function arguments of function func_with_no_calls}} + transform.func.deduplicate_func_args @func_with_no_calls at %module : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} diff --git a/mlir/test/Dialect/Func/func-transform.mlir b/mlir/test/Dialect/Func/func-transform.mlir index 36a66aaa95bfb..8a71511e3ed5b 100644 --- a/mlir/test/Dialect/Func/func-transform.mlir +++ b/mlir/test/Dialect/Func/func-transform.mlir @@ -250,3 +250,65 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +// CHECK: func.func private @func_with_duplicate_args(%[[ARG0:.*]]: memref<1xi8, 1>, %[[ARG1:.*]]: memref<2xi8, 1>) { +func.func private @func_with_duplicate_args(%arg0: memref<1xi8, 1>, %arg1: memref<2xi8, 1>, %arg2: memref<1xi8, 1>) { + %c0 = arith.constant 0 : index + // CHECK: %[[VAL_3:.*]] = memref.view %[[ARG0]]{{\[}}%[[C0:.*]]][] : memref<1xi8, 1> to memref<1xi8, 1> + %view = memref.view %arg0[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + // CHECK: %[[VAL_4:.*]] = memref.view %[[ARG1]]{{\[}}%[[C0]]][] : memref<2xi8, 1> to memref<2xi8, 1> + %view0 = memref.view %arg1[%c0][] : memref<2xi8, 1> to memref<2xi8, 1> + // CHECK: %[[VAL_5:.*]] = memref.view %[[ARG0]]{{\[}}%[[C0]]][] : memref<1xi8, 1> to memref<1xi8, 1> + %view1 = memref.view %arg2[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + return +} + +// CHECK: func.func @func_with_duplicate_args_caller(%[[ARG0:.*]]: memref<1xi8, 1>, %[[ARG1:.*]]: memref<2xi8, 1>) { +func.func @func_with_duplicate_args_caller(%arg0: memref<1xi8, 1>, %arg1: memref<2xi8, 1>) { + // CHECK: call @func_with_duplicate_args(%[[ARG0]], %[[ARG1]]) : (memref<1xi8, 1>, memref<2xi8, 1>) -> () + call @func_with_duplicate_args(%arg0, %arg1, %arg0) : (memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>) -> () + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%module: !transform.any_op) { + transform.func.deduplicate_func_args @func_with_duplicate_args at %module : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK: func.func private @func_with_complex_duplicate_args(%[[ARG0:.*]]: memref<1xi8, 1>, %[[ARG1:.*]]: memref<2xi8, 1>, %[[ARG2:.*]]: memref<3xi8, 1>) -> (memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1>) { +func.func private @func_with_complex_duplicate_args(%arg0: memref<1xi8, 1>, %arg1: memref<2xi8, 1>, %arg2: memref<1xi8, 1>, %arg3: memref<3xi8, 1>, %arg4: memref<2xi8, 1>) -> (memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1>) { + %c0 = arith.constant 0 : index + // CHECK: %[[RET_0:.*]] = memref.view %[[ARG0]]{{\[}}%[[C0:.*]]][] : memref<1xi8, 1> to memref<1xi8, 1> + %view0 = memref.view %arg0[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + // CHECK: %[[RET_1:.*]] = memref.view %[[ARG1]]{{\[}}%[[C0]]][] : memref<2xi8, 1> to memref<2xi8, 1> + %view1 = memref.view %arg1[%c0][] : memref<2xi8, 1> to memref<2xi8, 1> + // CHECK: %[[RET_2:.*]] = memref.view %[[ARG0]]{{\[}}%[[C0]]][] : memref<1xi8, 1> to memref<1xi8, 1> + %view2 = memref.view %arg2[%c0][] : memref<1xi8, 1> to memref<1xi8, 1> + // CHECK: %[[RET_3:.*]] = memref.view %[[ARG2]]{{\[}}%[[C0]]][] : memref<3xi8, 1> to memref<3xi8, 1> + %view3 = memref.view %arg3[%c0][] : memref<3xi8, 1> to memref<3xi8, 1> + // CHECK: %[[RET_4:.*]] = memref.view %[[ARG1]]{{\[}}%[[C0]]][] : memref<2xi8, 1> to memref<2xi8, 1> + %view4 = memref.view %arg4[%c0][] : memref<2xi8, 1> to memref<2xi8, 1> + // CHECK: return %[[RET_0]], %[[RET_1]], %[[RET_2]], %[[RET_3]], %[[RET_4]] : memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1> + return %view0, %view1, %view2, %view3, %view4 : memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1> +} + +// CHECK: func.func @func_with_complex_duplicate_args_caller(%[[ARG0:.*]]: memref<1xi8, 1>, %[[ARG1:.*]]: memref<2xi8, 1>, %[[ARG2:.*]]: memref<3xi8, 1>) -> (memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1>) { +func.func @func_with_complex_duplicate_args_caller(%arg0: memref<1xi8, 1>, %arg1: memref<2xi8, 1>, %arg2: memref<3xi8, 1>) -> (memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1>) { + // CHECK: %[[RET:.*]]:5 = call @func_with_complex_duplicate_args(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (memref<1xi8, 1>, memref<2xi8, 1>, memref<3xi8, 1>) -> (memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1>) + %0:5 = call @func_with_complex_duplicate_args(%arg0, %arg1, %arg0, %arg2, %arg1) : (memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1>) -> (memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1>) + // CHECK: return %[[RET]]#0, %[[RET]]#1, %[[RET]]#2, %[[RET]]#3, %[[RET]]#4 : memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1> + return %0#0, %0#1, %0#2, %0#3, %0#4 : memref<1xi8, 1>, memref<2xi8, 1>, memref<1xi8, 1>, memref<3xi8, 1>, memref<2xi8, 1> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%module: !transform.any_op) { + transform.func.deduplicate_func_args @func_with_complex_duplicate_args at %module : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} From 1d27e663691f0dc8b42acd09cab8c6eb15489950 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 15 Sep 2025 05:41:31 -0700 Subject: [PATCH 327/734] [Github] Fix security issues in libcxx-run-benchmarks.yml workflow (#158467) There was one action dependency that was not hash pinned and this workflow also allowed code injection as the input might not be properly escaped when dumped into the run script. --- .github/workflows/libcxx-run-benchmarks.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libcxx-run-benchmarks.yml b/.github/workflows/libcxx-run-benchmarks.yml index 5714600b63a5e..17a97df029ba5 100644 --- a/.github/workflows/libcxx-run-benchmarks.yml +++ b/.github/workflows/libcxx-run-benchmarks.yml @@ -33,12 +33,14 @@ jobs: runs-on: llvm-premerge-libcxx-next-runners # TODO: This should run on a dedicated set of machines steps: - - uses: actions/setup-python@v6 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 with: python-version: '3.10' - name: Extract information from the PR id: vars + env: + COMMENT_BODY: ${{ github.event.comment.body }} run: | python3 -m venv .venv source .venv/bin/activate @@ -51,7 +53,7 @@ jobs: print(f"pr_base={pr.base.sha}") print(f"pr_head={pr.head.sha}") EOF - BENCHMARKS=$(echo "${{ github.event.comment.body }}" | sed -nE 's/\/libcxx-bot benchmark (.+)/\1/p') + BENCHMARKS=$(echo "$COMMENT_BODY" | sed -nE 's/\/libcxx-bot benchmark (.+)/\1/p') echo "benchmarks=${BENCHMARKS}" >> ${GITHUB_OUTPUT} - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 From dd8767b10176c307b58af5d7aff63f59292115bf Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Mon, 15 Sep 2025 20:43:41 +0800 Subject: [PATCH 328/734] [libc++][test] Clean-up `MinSequenceContainer` (#158432) Follows-up #158344. - Guard range-related functions with `TEST_STD_VER >= 23`. - Mark range-related functions unconditionally `constexpr`. - Add `TEST_CONSTEXPR_CXX20` to one `operator=`. This will make `MinSequenceContainer` more consistent and useful for legacy container adaptors or C++26 `constexpr` additions. Although we're currently only using it with flat container adaptors. --- libcxx/test/support/MinSequenceContainer.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/libcxx/test/support/MinSequenceContainer.h b/libcxx/test/support/MinSequenceContainer.h index 8433ebb39160c..f9e67cd726eb7 100644 --- a/libcxx/test/support/MinSequenceContainer.h +++ b/libcxx/test/support/MinSequenceContainer.h @@ -29,12 +29,13 @@ struct MinSequenceContainer { template explicit TEST_CONSTEXPR_CXX20 MinSequenceContainer(It first, It last) : data_(first, last) {} TEST_CONSTEXPR_CXX20 MinSequenceContainer(std::initializer_list il) : data_(il) {} +#if TEST_STD_VER >= 23 template - TEST_CONSTEXPR_CXX20 MinSequenceContainer(std::from_range_t, Range&& rg) - : data_(std::from_range, std::forward(rg)) {} + constexpr MinSequenceContainer(std::from_range_t, Range&& rg) : data_(std::from_range, std::forward(rg)) {} +#endif TEST_CONSTEXPR_CXX20 MinSequenceContainer(size_type n, T value) : data_(n, value) {} - MinSequenceContainer& operator=(std::initializer_list il) { data_ = il; } + TEST_CONSTEXPR_CXX20 MinSequenceContainer& operator=(std::initializer_list il) { data_ = il; } template TEST_CONSTEXPR_CXX20 void assign(It first, It last) { @@ -42,10 +43,12 @@ struct MinSequenceContainer { } TEST_CONSTEXPR_CXX20 void assign(std::initializer_list il) { data_.assign(il); } TEST_CONSTEXPR_CXX20 void assign(size_type n, value_type t) { data_.assign(n, t); } +#if TEST_STD_VER >= 23 template - TEST_CONSTEXPR_CXX20 void assign_range(Range&& rg) { + constexpr void assign_range(Range&& rg) { data_.assign_range(std::forward(rg)); } +#endif TEST_CONSTEXPR_CXX20 iterator begin() { return iterator(data_.data()); } TEST_CONSTEXPR_CXX20 const_iterator begin() const { return const_iterator(data_.data()); } TEST_CONSTEXPR_CXX20 const_iterator cbegin() const { return const_iterator(data_.data()); } @@ -73,10 +76,12 @@ struct MinSequenceContainer { return from_vector_iterator(data_.insert(to_vector_iterator(p), il)); } +#if TEST_STD_VER >= 23 template - TEST_CONSTEXPR_CXX20 iterator insert_range(const_iterator p, Range&& rg) { + constexpr iterator insert_range(const_iterator p, Range&& rg) { return from_vector_iterator(data_.insert_range(to_vector_iterator(p), std::forward(rg))); } +#endif TEST_CONSTEXPR_CXX20 iterator erase(const_iterator first, const_iterator last) { return from_vector_iterator(data_.erase(to_vector_iterator(first), to_vector_iterator(last))); From 0058cc81f14bc76d1c81ba3bd20d9e341e047277 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 15 Sep 2025 08:44:51 -0400 Subject: [PATCH 329/734] [libc++] Add documentation for the new comment-triggered benchmarking bot (#158167) The new benchmarking bot is experimental and is very rough on the edges. --- libcxx/docs/TestingLibcxx.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libcxx/docs/TestingLibcxx.rst b/libcxx/docs/TestingLibcxx.rst index 44463385b81a7..227791031bab0 100644 --- a/libcxx/docs/TestingLibcxx.rst +++ b/libcxx/docs/TestingLibcxx.rst @@ -531,6 +531,16 @@ Finally, use ``compare-benchmarks`` to compare both: The ``compare-benchmarks`` script provides some useful options like creating a chart to easily visualize differences in a browser window. Use ``compare-benchmarks --help`` for details. +Additionally, adding a comment of the following form to a libc++ PR will cause the specified benchmarks to be run +on our pre-commit CI infrastructure and the results to be reported in the PR by our CI system: + +.. code-block:: + + /libcxx-bot benchmark ... + +Note that this is currently experimental and the results should not be relied upon too strongly, since +we do not have dedicated hardware to run the benchmarks on. + .. _`Google Benchmark`: https://github.com/google/benchmark .. _testing-hardening-assertions: From f0cf167a5eb36de2657669c68190517624a2cee9 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 15 Sep 2025 13:46:09 +0100 Subject: [PATCH 330/734] [libc++][CI] Update meson used for building picolibc (#158308) 1.9.0 is the latest release. This doesn't fix any current problem, I just want to make sure we update these things once in a while in case upstream picolibc adopts new versions. --- libcxx/utils/ci/build-picolibc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/utils/ci/build-picolibc.sh b/libcxx/utils/ci/build-picolibc.sh index 4be768d741230..a25a588cb8d2e 100755 --- a/libcxx/utils/ci/build-picolibc.sh +++ b/libcxx/utils/ci/build-picolibc.sh @@ -95,7 +95,7 @@ EOF venv_dir="${build_dir}/meson-venv" python3 -m venv "${venv_dir}" # Install the version of meson that was the latest at the time this script was written. -"${venv_dir}/bin/pip" install "meson==1.1.1" +"${venv_dir}/bin/pip" install "meson==1.9.0" "${venv_dir}/bin/meson" setup \ -Dincludedir=include -Dlibdir=lib -Dspecsdir=none -Dmultilib=false -Dpicoexit=false \ From a41660aaf40b0f3b119300a8d980be52c2c4feed Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 15 Sep 2025 08:47:40 -0400 Subject: [PATCH 331/734] [libc++] Don't run multithreaded stringstream benchmarks (#158294) This results in invalid JSON output as reported in google/benchmark#2039. --- libcxx/test/benchmarks/stringstream.bench.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/test/benchmarks/stringstream.bench.cpp b/libcxx/test/benchmarks/stringstream.bench.cpp index b7c50a96ef51e..367024c49ac69 100644 --- a/libcxx/test/benchmarks/stringstream.bench.cpp +++ b/libcxx/test/benchmarks/stringstream.bench.cpp @@ -80,7 +80,7 @@ static void BM_Istream_numbers(benchmark::State& state) { while (state.KeepRunning()) benchmark::DoNotOptimize(i += istream_numbers(sel.imbue)); } -BENCHMARK(BM_Istream_numbers)->DenseRange(0, 3)->UseRealTime()->Threads(1)->ThreadPerCpu(); +BENCHMARK(BM_Istream_numbers)->DenseRange(0, 3); static void BM_Ostream_number(benchmark::State& state) { LocaleSelector sel(state); @@ -92,6 +92,6 @@ static void BM_Ostream_number(benchmark::State& state) { benchmark::DoNotOptimize(ss.str().c_str()); } } -BENCHMARK(BM_Ostream_number)->DenseRange(0, 3)->UseRealTime()->Threads(1)->ThreadPerCpu(); +BENCHMARK(BM_Ostream_number)->DenseRange(0, 3); BENCHMARK_MAIN(); From d0263f07d1ad35223b017e660725c0a093e89e74 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 15 Sep 2025 08:48:13 -0400 Subject: [PATCH 332/734] [libc++] Remove complexity calculations from benchmark (#158290) Our benchmarks are not really suited for complexity calculation, since that doesn't translate nicely to any of the performance tracking tools we have (including Lit). --- libcxx/test/benchmarks/filesystem.bench.cpp | 31 ++++++--------------- libcxx/utils/parse-google-benchmark-results | 2 ++ 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/libcxx/test/benchmarks/filesystem.bench.cpp b/libcxx/test/benchmarks/filesystem.bench.cpp index c058a5d41a150..61d14a453e72f 100644 --- a/libcxx/test/benchmarks/filesystem.bench.cpp +++ b/libcxx/test/benchmarks/filesystem.bench.cpp @@ -30,9 +30,8 @@ void BM_PathConstructString(benchmark::State& st, GenInputs gen) { const path P(PP.native()); benchmark::DoNotOptimize(P.native().data()); } - st.SetComplexityN(st.range(0)); } -BENCHMARK_CAPTURE(BM_PathConstructString, large_string, getRandomStringInputs)->Range(8, TestNumInputs)->Complexity(); +BENCHMARK_CAPTURE(BM_PathConstructString, large_string, getRandomStringInputs)->Range(8, TestNumInputs); template void BM_PathConstructCStr(benchmark::State& st, GenInputs gen) { @@ -66,7 +65,6 @@ void BM_PathConstructIter(benchmark::State& st, GenInputs gen) { const path P(Start, End); benchmark::DoNotOptimize(P.native().data()); } - st.SetComplexityN(st.range(0)); } template void BM_PathConstructInputIter(benchmark::State& st, GenInputs gen) { @@ -77,11 +75,9 @@ void BM_PathConstructForwardIter(benchmark::State& st, GenInputs gen) { BM_PathConstructIter(st, gen); } BENCHMARK_CAPTURE(BM_PathConstructInputIter, large_string, getRandomStringInputs) - ->Range(8, TestNumInputs) - ->Complexity(); + ->Range(8, TestNumInputs); BENCHMARK_CAPTURE(BM_PathConstructForwardIter, large_string, getRandomStringInputs) - ->Range(8, TestNumInputs) - ->Complexity(); + ->Range(8, TestNumInputs); template void BM_PathIterateMultipleTimes(benchmark::State& st, GenInputs gen) { @@ -97,11 +93,9 @@ void BM_PathIterateMultipleTimes(benchmark::State& st, GenInputs gen) { } benchmark::ClobberMemory(); } - st.SetComplexityN(st.range(0)); } BENCHMARK_CAPTURE(BM_PathIterateMultipleTimes, iterate_elements, getRandomStringInputs) - ->Range(8, TestNumInputs) - ->Complexity(); + ->Range(8, TestNumInputs); template void BM_PathIterateOnce(benchmark::State& st, GenInputs gen) { @@ -118,9 +112,8 @@ void BM_PathIterateOnce(benchmark::State& st, GenInputs gen) { } benchmark::ClobberMemory(); } - st.SetComplexityN(st.range(0)); } -BENCHMARK_CAPTURE(BM_PathIterateOnce, iterate_elements, getRandomStringInputs)->Range(8, TestNumInputs)->Complexity(); +BENCHMARK_CAPTURE(BM_PathIterateOnce, iterate_elements, getRandomStringInputs)->Range(8, TestNumInputs); template void BM_PathIterateOnceBackwards(benchmark::State& st, GenInputs gen) { @@ -160,16 +153,13 @@ void BM_LexicallyNormal(benchmark::State& st, GenInput gen, size_t PathLen) { while (st.KeepRunning()) { benchmark::DoNotOptimize(In.lexically_normal()); } - st.SetComplexityN(st.range(0)); } BENCHMARK_CAPTURE(BM_LexicallyNormal, small_path, getRandomPaths, /*PathLen*/ 5) ->RangeMultiplier(2) - ->Range(2, 256) - ->Complexity(); + ->Range(2, 256); BENCHMARK_CAPTURE(BM_LexicallyNormal, large_path, getRandomPaths, /*PathLen*/ 32) ->RangeMultiplier(2) - ->Range(2, 256) - ->Complexity(); + ->Range(2, 256); template void BM_LexicallyRelative(benchmark::State& st, GenInput gen, size_t PathLen) { @@ -180,15 +170,12 @@ void BM_LexicallyRelative(benchmark::State& st, GenInput gen, size_t PathLen) { for (auto _ : st) { benchmark::DoNotOptimize(TargetPath.lexically_relative(BasePath)); } - st.SetComplexityN(st.range(0)); } BENCHMARK_CAPTURE(BM_LexicallyRelative, small_path, getRandomPaths, /*PathLen*/ 5) ->RangeMultiplier(2) - ->Range(2, 256) - ->Complexity(); + ->Range(2, 256); BENCHMARK_CAPTURE(BM_LexicallyRelative, large_path, getRandomPaths, /*PathLen*/ 32) ->RangeMultiplier(2) - ->Range(2, 256) - ->Complexity(); + ->Range(2, 256); BENCHMARK_MAIN(); diff --git a/libcxx/utils/parse-google-benchmark-results b/libcxx/utils/parse-google-benchmark-results index 280c8045db6c9..86d59bb522a4d 100755 --- a/libcxx/utils/parse-google-benchmark-results +++ b/libcxx/utils/parse-google-benchmark-results @@ -26,6 +26,8 @@ def main(argv): for file in args.filename: js = json.load(file) for bm in js['benchmarks']: + if args.timing not in bm: + raise RuntimeError(f'Benchmark does not contain key for {args.timing}: {bm}') row = [bm['name'], bm[args.timing]] rows.append(row) From e0a33cb599f8614b3d897ad2bd2f9fa8e1acbac5 Mon Sep 17 00:00:00 2001 From: camc <69519329+camc@users.noreply.github.com> Date: Mon, 15 Sep 2025 14:04:45 +0100 Subject: [PATCH 333/734] [clang] Allow attributes on first constructor argument in pre-C++11 (#157300) Resolves GH-156809 Modifies decl parser to allow C++11 style [[attributes]] on the first argument in constructors in all C++ standards. They are already allowed on later arguments. --------- Co-authored-by: Shafik Yaghmour --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/Parse/ParseDecl.cpp | 7 +++---- clang/test/Parser/cxx03-attributes.cpp | 6 ++++++ 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 clang/test/Parser/cxx03-attributes.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index bdf8334f78cea..dbba8f5db0cef 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -365,6 +365,8 @@ Bug Fixes to C++ Support authentication enabled. (#GH152601) - Fix the check for narrowing int-to-float conversions, so that they are detected in cases where converting the float back to an integer is undefined behaviour (#GH157067). +- Stop rejecting C++11-style attributes on the first argument of constructors in older + standards. (#GH156809). - Fix a crash when applying binary or ternary operators to two same function types with different spellings, where at least one of the function parameters has an attribute which affects the function type. diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 10355bb874762..bbeee2e3e373f 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -6007,10 +6007,9 @@ bool Parser::isConstructorDeclarator(bool IsUnqualified, bool DeductionGuide, // A C++11 attribute here signals that we have a constructor, and is an // attribute on the first constructor parameter. - if (getLangOpts().CPlusPlus11 && - isCXX11AttributeSpecifier(/*Disambiguate*/ false, - /*OuterMightBeMessageSend*/ true) != - CXX11AttributeKind::NotAttributeSpecifier) { + if (isCXX11AttributeSpecifier(/*Disambiguate=*/false, + /*OuterMightBeMessageSend=*/true) != + CXX11AttributeKind::NotAttributeSpecifier) { return true; } diff --git a/clang/test/Parser/cxx03-attributes.cpp b/clang/test/Parser/cxx03-attributes.cpp new file mode 100644 index 0000000000000..d3afef76366a3 --- /dev/null +++ b/clang/test/Parser/cxx03-attributes.cpp @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++03 %s +// expected-no-diagnostics + +struct S { + S([[clang::lifetimebound]] int&) {} +}; From 895cda70a95529fd22aac05eee7c34f7624996af Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Mon, 15 Sep 2025 08:18:32 -0500 Subject: [PATCH 334/734] Introduce -fexperimental-loop-fuse to clang and flang (#142686) This patch adds the flag -fexperimental-loop-fuse to the clang and flang drivers. This is primarily useful for experiments as we envision to enable the pass one day. The options are based on the same principles and reason on which we have `floop-interchange`. --------- Co-authored-by: Madhur Amilkanthwar --- clang/include/clang/Basic/CodeGenOptions.def | 1 + clang/include/clang/Driver/Options.td | 4 ++++ clang/lib/CodeGen/BackendUtil.cpp | 2 ++ clang/lib/Driver/ToolChains/Clang.cpp | 2 ++ clang/lib/Driver/ToolChains/Flang.cpp | 3 +++ clang/lib/Frontend/CompilerInvocation.cpp | 5 +++++ clang/test/Driver/clang_f_opts.c | 9 +++++++++ flang/docs/ReleaseNotes.md | 2 ++ flang/include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 3 +++ flang/lib/Frontend/FrontendActions.cpp | 1 + flang/test/Driver/loop-fuse.f90 | 17 +++++++++++++++++ llvm/include/llvm/Passes/PassBuilder.h | 3 +++ llvm/lib/Passes/PassBuilderPipelines.cpp | 8 +++++++- llvm/tools/opt/NewPMDriver.cpp | 4 ++++ 15 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 flang/test/Driver/loop-fuse.f90 diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index fda0da99b60c0..872f73ebf3810 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -322,6 +322,7 @@ CODEGENOPT(TimeTrace , 1, 0, Benign) ///< Set when -ftime-trace is enabl VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500, Benign) ///< Minimum time granularity (in microseconds), ///< traced by time profiler CODEGENOPT(InterchangeLoops , 1, 0, Benign) ///< Run loop-interchange. +CODEGENOPT(FuseLoops , 1, 0, Benign) ///< Run loop-fusion. CODEGENOPT(UnrollLoops , 1, 0, Benign) ///< Control whether loops are unrolled. CODEGENOPT(RerollLoops , 1, 0, Benign) ///< Control whether loops are rerolled. CODEGENOPT(NoUseJumpTables , 1, 0, Benign) ///< Set when -fno-jump-tables is enabled. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a7c514e809aa9..47d328f862e07 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4304,6 +4304,10 @@ def floop_interchange : Flag<["-"], "floop-interchange">, Group, HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group, HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; +defm experimental_loop_fusion + : OptInCC1FFlag<"experimental-loop-fusion", "Enable", "Disable", + "Enable the loop fusion pass", + [ClangOption, FlangOption, FC1Option]>; def funroll_loops : Flag<["-"], "funroll-loops">, Group, HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 3f095c03397fd..8c99af2bdff83 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -896,6 +896,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( PipelineTuningOptions PTO; PTO.LoopUnrolling = CodeGenOpts.UnrollLoops; PTO.LoopInterchange = CodeGenOpts.InterchangeLoops; + PTO.LoopFusion = CodeGenOpts.FuseLoops; // For historical reasons, loop interleaving is set to mirror setting for loop // unrolling. PTO.LoopInterleaving = CodeGenOpts.UnrollLoops; @@ -1331,6 +1332,7 @@ runThinLTOBackend(CompilerInstance &CI, ModuleSummaryIndex *CombinedIndex, Conf.SampleProfile = std::move(SampleProfile); Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops; Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops; + Conf.PTO.LoopFusion = CGOpts.FuseLoops; // For historical reasons, loop interleaving is set to mirror setting for loop // unrolling. Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 946b1e39af3b9..63efb0f02baa8 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6854,6 +6854,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_unroll_loops); Args.AddLastArg(CmdArgs, options::OPT_floop_interchange, options::OPT_fno_loop_interchange); + Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_loop_fusion, + options::OPT_fno_experimental_loop_fusion); Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1535f4cebf436..d3f4af164f672 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -151,6 +151,9 @@ void Flang::addCodegenOptions(const ArgList &Args, !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_loop_fusion, + options::OPT_fno_experimental_loop_fusion); + handleInterchangeLoopsArgs(Args, CmdArgs); handleVectorizeLoopsArgs(Args, CmdArgs); handleVectorizeSLPArgs(Args, CmdArgs); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 761310813f787..422375240bab6 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1680,6 +1680,9 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts, else GenerateArg(Consumer, OPT_fno_loop_interchange); + if (Opts.FuseLoops) + GenerateArg(Consumer, OPT_fexperimental_loop_fusion); + if (!Opts.BinutilsVersion.empty()) GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion); @@ -2001,6 +2004,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, (Opts.OptimizationLevel > 1)); Opts.InterchangeLoops = Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false); + Opts.FuseLoops = Args.hasFlag(OPT_fexperimental_loop_fusion, + OPT_fno_experimental_loop_fusion, false); Opts.BinutilsVersion = std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ)); diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index ee7ded265769b..eb3994ddabcd3 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -52,6 +52,15 @@ // CHECK-INTERCHANGE-LOOPS: "-floop-interchange" // CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange" +// RUN: %clang -### -S -fexperimental-loop-fusion %s 2>&1 | FileCheck -check-prefix=CHECK-FUSE-LOOPS %s +// CHECK-FUSE-LOOPS: "-fexperimental-loop-fusion" +// +// RUN: %clang -c -fexperimental-loop-fusion -mllvm -print-pipeline-passes -O3 %s 2>&1 | FileCheck --check-prefixes=LOOP-FUSION-ON %s +// RUN: %clang -c -mllvm -print-pipeline-passes -O3 %s 2>&1 | FileCheck --check-prefixes=LOOP-FUSION-OFF %s + +// LOOP-FUSION-ON: loop-fusion +// LOOP-FUSION-OFF-NOT: loop-fusion + // RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s // CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate" diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md index c9623ea08c4e6..6a285f829053b 100644 --- a/flang/docs/ReleaseNotes.md +++ b/flang/docs/ReleaseNotes.md @@ -35,6 +35,8 @@ page](https://llvm.org/releases/). ## New Compiler Flags +* -fexperimental-loop-fusion is now recognized by flang. + ## Windows Support ## Fortran Language Changes in Flang diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index cdeea93c9aecb..edab48a70d29d 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -43,6 +43,7 @@ CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization. CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization. CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange. +CODEGENOPT(FuseLoops, 1, 0) ///< Enable loop fusion. CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 6295a58b1bdad..4f42fbd66eac0 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -276,6 +276,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, if (args.getLastArg(clang::driver::options::OPT_floop_interchange)) opts.InterchangeLoops = 1; + if (args.getLastArg(clang::driver::options::OPT_fexperimental_loop_fusion)) + opts.FuseLoops = 1; + if (args.getLastArg(clang::driver::options::OPT_vectorize_loops)) opts.VectorizeLoop = 1; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 3bef6b1c31825..23cc1e63e773d 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -958,6 +958,7 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { si.getTimePasses().setOutStream(ci.getTimingStreamLLVM()); pto.LoopUnrolling = opts.UnrollLoops; pto.LoopInterchange = opts.InterchangeLoops; + pto.LoopFusion = opts.FuseLoops; pto.LoopInterleaving = opts.UnrollLoops; pto.LoopVectorization = opts.VectorizeLoop; pto.SLPVectorization = opts.VectorizeSLP; diff --git a/flang/test/Driver/loop-fuse.f90 b/flang/test/Driver/loop-fuse.f90 new file mode 100644 index 0000000000000..ddfd9065e0fd4 --- /dev/null +++ b/flang/test/Driver/loop-fuse.f90 @@ -0,0 +1,17 @@ +! RUN: %flang -### -S -fexperimental-loop-fusion %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE %s +! RUN: %flang -### -S -fno-experimental-loop-fusion %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! CHECK-LOOP-FUSE: "-fexperimental-loop-fusion" +! CHECK-NO-LOOP-FUSE-NOT: "-fexperimental-loop-fusion" +! RUN: %flang_fc1 -emit-llvm -O2 -fexperimental-loop-fusion -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE-PASS %s +! RUN: %flang_fc1 -emit-llvm -O2 -fno-experimental-loop-fusion -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE-PASS %s +! CHECK-LOOP-FUSE-PASS: loop-fusion +! CHECK-NO-LOOP-FUSE-PASS-NOT: loop-fusion + +program test +end program diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 9cdb7ca7dbc9b..2742ec1b71b7e 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -65,6 +65,9 @@ class PipelineTuningOptions { /// false. bool LoopInterchange; + /// Tuning option to enable/disable loop fusion. Its default value is false. + bool LoopFusion; + /// Tuning option to forget all SCEV loops in LoopUnroll. Its default value /// is that of the flag: `-forget-scev-loop-unroll`. bool ForgetAllSCEVInLoopUnroll; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 98821bb1408a7..79642e650ac83 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -104,6 +104,7 @@ #include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/Transforms/Scalar/LoopFlatten.h" +#include "llvm/Transforms/Scalar/LoopFuse.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopInterchange.h" @@ -1551,6 +1552,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass(createFunctionToLoopPassAdaptor( std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); + // FIXME: This may not be the right place in the pipeline. + // We need to have the data to support the right place. + if (PTO.LoopFusion) + OptimizePM.addPass(LoopFusePass()); + // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is // currently only performed for loops marked with the metadata @@ -2355,4 +2361,4 @@ AAManager PassBuilder::buildDefaultAAPipeline() { bool PassBuilder::isInstrumentedPGOUse() const { return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) || !UseCtxProfile.empty(); -} \ No newline at end of file +} diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index b9b8929a0f703..0c991b71a6b26 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -60,6 +60,9 @@ cl::opt VerifyEachDebugInfoPreserve( cl::desc("Start each pass with collecting and end it with checking of " "debug info preservation.")); +static cl::opt EnableLoopFusion("enable-loopfusion", cl::init(false), + cl::Hidden, + cl::desc("Enable the LoopFuse Pass")); cl::opt VerifyDIPreserveExport("verify-di-preserve-export", cl::desc("Export debug info preservation failures into " @@ -446,6 +449,7 @@ bool llvm::runPassPipeline( // option has been enabled. PTO.LoopUnrolling = !DisableLoopUnrolling; PTO.UnifiedLTO = UnifiedLTO; + PTO.LoopFusion = EnableLoopFusion; PassBuilder PB(TM, PTO, P, &PIC); registerEPCallbacks(PB); From 4452fbddc5c410f50557ea9f1ef6e1e83d236b1e Mon Sep 17 00:00:00 2001 From: Shaoce SUN Date: Mon, 15 Sep 2025 21:27:46 +0800 Subject: [PATCH 335/734] [RISCV][GlobalIsel] Reduce constant pool usage without FP extension (#158346) The recognition range can be extended later. --- .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 28 +- .../Target/RISCV/GISel/RISCVLegalizerInfo.h | 1 + .../CodeGen/RISCV/GlobalISel/constantpool.ll | 144 ++++++-- .../CodeGen/RISCV/GlobalISel/double-arith.ll | 158 ++++----- .../CodeGen/RISCV/GlobalISel/float-arith.ll | 316 ++++++++---------- llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll | 8 +- 6 files changed, 354 insertions(+), 301 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 16f34a89a52ec..82a571587cc77 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -572,7 +572,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .legalFor(ST.hasStdExtF(), {s32}) .legalFor(ST.hasStdExtD(), {s64}) .legalFor(ST.hasStdExtZfh(), {s16}) - .lowerFor({s32, s64, s128}); + .customFor(!ST.is64Bit(), {s32}) + .customFor(ST.is64Bit(), {s32, s64}) + .lowerFor({s64, s128}); getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) .legalFor(ST.hasStdExtF(), {{sXLen, s32}}) @@ -869,6 +871,17 @@ bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm, return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost()); } +bool RISCVLegalizerInfo::shouldBeInFConstantPool(const APFloat &APF) const { + [[maybe_unused]] unsigned Size = APF.getSizeInBits(APF.getSemantics()); + assert((Size == 32 || Size == 64) && "Only support f32 and f64"); + + int64_t Imm = APF.bitcastToAPInt().getSExtValue(); + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, STI); + if (Seq.size() <= STI.getMaxBuildIntsCost()) + return false; + return true; +} + bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const { const LLT XLenTy(STI.getXLenVT()); @@ -1358,7 +1371,18 @@ bool RISCVLegalizerInfo::legalizeCustom( return false; case TargetOpcode::G_ABS: return Helper.lowerAbsToMaxNeg(MI); - // TODO: G_FCONSTANT + case TargetOpcode::G_FCONSTANT: { + const APFloat FVal = MI.getOperand(1).getFPImm()->getValueAPF(); + if (shouldBeInFConstantPool(FVal)) + return Helper.lowerFConstant(MI); + + // Convert G_FCONSTANT to G_CONSTANT. + Register DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildConstant(DstReg, FVal.bitcastToAPInt()); + + MI.eraseFromParent(); + return true; + } case TargetOpcode::G_CONSTANT: { const Function &F = MF.getFunction(); // TODO: if PSI and BFI are present, add " || diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h index 4451866745194..bd6d1665849c8 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h @@ -39,6 +39,7 @@ class RISCVLegalizerInfo : public LegalizerInfo { private: bool shouldBeInConstantPool(const APInt &APImm, bool ShouldOptForSize) const; + bool shouldBeInFConstantPool(const APFloat &APImm) const; bool legalizeShlAshrLshr(MachineInstr &MI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/constantpool.ll b/llvm/test/CodeGen/RISCV/GlobalISel/constantpool.ll index 1eeeb60c2eb40..cee04492dc441 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/constantpool.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/constantpool.ll @@ -15,47 +15,37 @@ define void @constpool_f32(ptr %p) { ; RV32-SMALL-LABEL: constpool_f32: ; RV32-SMALL: # %bb.0: -; RV32-SMALL-NEXT: lui a1, %hi(.LCPI0_0) -; RV32-SMALL-NEXT: lw a1, %lo(.LCPI0_0)(a1) +; RV32-SMALL-NEXT: lui a1, 260096 ; RV32-SMALL-NEXT: sw a1, 0(a0) ; RV32-SMALL-NEXT: ret ; ; RV32-MEDIUM-LABEL: constpool_f32: ; RV32-MEDIUM: # %bb.0: -; RV32-MEDIUM-NEXT: .Lpcrel_hi0: -; RV32-MEDIUM-NEXT: auipc a1, %pcrel_hi(.LCPI0_0) -; RV32-MEDIUM-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi0)(a1) +; RV32-MEDIUM-NEXT: lui a1, 260096 ; RV32-MEDIUM-NEXT: sw a1, 0(a0) ; RV32-MEDIUM-NEXT: ret ; ; RV32-PIC-LABEL: constpool_f32: ; RV32-PIC: # %bb.0: -; RV32-PIC-NEXT: .Lpcrel_hi0: -; RV32-PIC-NEXT: auipc a1, %pcrel_hi(.LCPI0_0) -; RV32-PIC-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi0)(a1) +; RV32-PIC-NEXT: lui a1, 260096 ; RV32-PIC-NEXT: sw a1, 0(a0) ; RV32-PIC-NEXT: ret ; ; RV64-SMALL-LABEL: constpool_f32: ; RV64-SMALL: # %bb.0: -; RV64-SMALL-NEXT: lui a1, %hi(.LCPI0_0) -; RV64-SMALL-NEXT: lw a1, %lo(.LCPI0_0)(a1) +; RV64-SMALL-NEXT: lui a1, 260096 ; RV64-SMALL-NEXT: sw a1, 0(a0) ; RV64-SMALL-NEXT: ret ; ; RV64-MEDIUM-LABEL: constpool_f32: ; RV64-MEDIUM: # %bb.0: -; RV64-MEDIUM-NEXT: .Lpcrel_hi0: -; RV64-MEDIUM-NEXT: auipc a1, %pcrel_hi(.LCPI0_0) -; RV64-MEDIUM-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi0)(a1) +; RV64-MEDIUM-NEXT: lui a1, 260096 ; RV64-MEDIUM-NEXT: sw a1, 0(a0) ; RV64-MEDIUM-NEXT: ret ; ; RV64-PIC-LABEL: constpool_f32: ; RV64-PIC: # %bb.0: -; RV64-PIC-NEXT: .Lpcrel_hi0: -; RV64-PIC-NEXT: auipc a1, %pcrel_hi(.LCPI0_0) -; RV64-PIC-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi0)(a1) +; RV64-PIC-NEXT: lui a1, 260096 ; RV64-PIC-NEXT: sw a1, 0(a0) ; RV64-PIC-NEXT: ret store float 1.0, ptr %p @@ -75,9 +65,9 @@ define void @constpool_f64(ptr %p) { ; ; RV32-MEDIUM-LABEL: constpool_f64: ; RV32-MEDIUM: # %bb.0: -; RV32-MEDIUM-NEXT: .Lpcrel_hi1: +; RV32-MEDIUM-NEXT: .Lpcrel_hi0: ; RV32-MEDIUM-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) -; RV32-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.Lpcrel_hi1) +; RV32-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.Lpcrel_hi0) ; RV32-MEDIUM-NEXT: lw a2, 0(a1) ; RV32-MEDIUM-NEXT: lw a1, 4(a1) ; RV32-MEDIUM-NEXT: sw a2, 0(a0) @@ -86,9 +76,9 @@ define void @constpool_f64(ptr %p) { ; ; RV32-PIC-LABEL: constpool_f64: ; RV32-PIC: # %bb.0: -; RV32-PIC-NEXT: .Lpcrel_hi1: +; RV32-PIC-NEXT: .Lpcrel_hi0: ; RV32-PIC-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) -; RV32-PIC-NEXT: addi a1, a1, %pcrel_lo(.Lpcrel_hi1) +; RV32-PIC-NEXT: addi a1, a1, %pcrel_lo(.Lpcrel_hi0) ; RV32-PIC-NEXT: lw a2, 0(a1) ; RV32-PIC-NEXT: lw a1, 4(a1) ; RV32-PIC-NEXT: sw a2, 0(a0) @@ -97,26 +87,124 @@ define void @constpool_f64(ptr %p) { ; ; RV64-SMALL-LABEL: constpool_f64: ; RV64-SMALL: # %bb.0: -; RV64-SMALL-NEXT: lui a1, %hi(.LCPI1_0) -; RV64-SMALL-NEXT: ld a1, %lo(.LCPI1_0)(a1) +; RV64-SMALL-NEXT: li a1, 1023 +; RV64-SMALL-NEXT: slli a1, a1, 52 ; RV64-SMALL-NEXT: sd a1, 0(a0) ; RV64-SMALL-NEXT: ret ; ; RV64-MEDIUM-LABEL: constpool_f64: ; RV64-MEDIUM: # %bb.0: -; RV64-MEDIUM-NEXT: .Lpcrel_hi1: -; RV64-MEDIUM-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) -; RV64-MEDIUM-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi1)(a1) +; RV64-MEDIUM-NEXT: li a1, 1023 +; RV64-MEDIUM-NEXT: slli a1, a1, 52 ; RV64-MEDIUM-NEXT: sd a1, 0(a0) ; RV64-MEDIUM-NEXT: ret ; ; RV64-PIC-LABEL: constpool_f64: ; RV64-PIC: # %bb.0: -; RV64-PIC-NEXT: .Lpcrel_hi1: -; RV64-PIC-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) -; RV64-PIC-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi1)(a1) +; RV64-PIC-NEXT: li a1, 1023 +; RV64-PIC-NEXT: slli a1, a1, 52 ; RV64-PIC-NEXT: sd a1, 0(a0) ; RV64-PIC-NEXT: ret store double 1.0, ptr %p ret void } + +define void @constpool_f32_1234_5(ptr %p) { +; RV32-SMALL-LABEL: constpool_f32_1234_5: +; RV32-SMALL: # %bb.0: +; RV32-SMALL-NEXT: lui a1, 280997 +; RV32-SMALL-NEXT: sw a1, 0(a0) +; RV32-SMALL-NEXT: ret +; +; RV32-MEDIUM-LABEL: constpool_f32_1234_5: +; RV32-MEDIUM: # %bb.0: +; RV32-MEDIUM-NEXT: lui a1, 280997 +; RV32-MEDIUM-NEXT: sw a1, 0(a0) +; RV32-MEDIUM-NEXT: ret +; +; RV32-PIC-LABEL: constpool_f32_1234_5: +; RV32-PIC: # %bb.0: +; RV32-PIC-NEXT: lui a1, 280997 +; RV32-PIC-NEXT: sw a1, 0(a0) +; RV32-PIC-NEXT: ret +; +; RV64-SMALL-LABEL: constpool_f32_1234_5: +; RV64-SMALL: # %bb.0: +; RV64-SMALL-NEXT: lui a1, 280997 +; RV64-SMALL-NEXT: sw a1, 0(a0) +; RV64-SMALL-NEXT: ret +; +; RV64-MEDIUM-LABEL: constpool_f32_1234_5: +; RV64-MEDIUM: # %bb.0: +; RV64-MEDIUM-NEXT: lui a1, 280997 +; RV64-MEDIUM-NEXT: sw a1, 0(a0) +; RV64-MEDIUM-NEXT: ret +; +; RV64-PIC-LABEL: constpool_f32_1234_5: +; RV64-PIC: # %bb.0: +; RV64-PIC-NEXT: lui a1, 280997 +; RV64-PIC-NEXT: sw a1, 0(a0) +; RV64-PIC-NEXT: ret + store float 1.234500e+03, ptr %p + ret void +} + +define void @constpool_f64_1234_5(ptr %p) { +; RV32-SMALL-LABEL: constpool_f64_1234_5: +; RV32-SMALL: # %bb.0: +; RV32-SMALL-NEXT: lui a1, %hi(.LCPI3_0) +; RV32-SMALL-NEXT: addi a1, a1, %lo(.LCPI3_0) +; RV32-SMALL-NEXT: lw a2, 0(a1) +; RV32-SMALL-NEXT: lw a1, 4(a1) +; RV32-SMALL-NEXT: sw a2, 0(a0) +; RV32-SMALL-NEXT: sw a1, 4(a0) +; RV32-SMALL-NEXT: ret +; +; RV32-MEDIUM-LABEL: constpool_f64_1234_5: +; RV32-MEDIUM: # %bb.0: +; RV32-MEDIUM-NEXT: .Lpcrel_hi1: +; RV32-MEDIUM-NEXT: auipc a1, %pcrel_hi(.LCPI3_0) +; RV32-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.Lpcrel_hi1) +; RV32-MEDIUM-NEXT: lw a2, 0(a1) +; RV32-MEDIUM-NEXT: lw a1, 4(a1) +; RV32-MEDIUM-NEXT: sw a2, 0(a0) +; RV32-MEDIUM-NEXT: sw a1, 4(a0) +; RV32-MEDIUM-NEXT: ret +; +; RV32-PIC-LABEL: constpool_f64_1234_5: +; RV32-PIC: # %bb.0: +; RV32-PIC-NEXT: .Lpcrel_hi1: +; RV32-PIC-NEXT: auipc a1, %pcrel_hi(.LCPI3_0) +; RV32-PIC-NEXT: addi a1, a1, %pcrel_lo(.Lpcrel_hi1) +; RV32-PIC-NEXT: lw a2, 0(a1) +; RV32-PIC-NEXT: lw a1, 4(a1) +; RV32-PIC-NEXT: sw a2, 0(a0) +; RV32-PIC-NEXT: sw a1, 4(a0) +; RV32-PIC-NEXT: ret +; +; RV64-SMALL-LABEL: constpool_f64_1234_5: +; RV64-SMALL: # %bb.0: +; RV64-SMALL-NEXT: lui a1, 517 +; RV64-SMALL-NEXT: addi a1, a1, -1627 +; RV64-SMALL-NEXT: slli a1, a1, 41 +; RV64-SMALL-NEXT: sd a1, 0(a0) +; RV64-SMALL-NEXT: ret +; +; RV64-MEDIUM-LABEL: constpool_f64_1234_5: +; RV64-MEDIUM: # %bb.0: +; RV64-MEDIUM-NEXT: lui a1, 517 +; RV64-MEDIUM-NEXT: addi a1, a1, -1627 +; RV64-MEDIUM-NEXT: slli a1, a1, 41 +; RV64-MEDIUM-NEXT: sd a1, 0(a0) +; RV64-MEDIUM-NEXT: ret +; +; RV64-PIC-LABEL: constpool_f64_1234_5: +; RV64-PIC: # %bb.0: +; RV64-PIC-NEXT: lui a1, 517 +; RV64-PIC-NEXT: addi a1, a1, -1627 +; RV64-PIC-NEXT: slli a1, a1, 41 +; RV64-PIC-NEXT: sd a1, 0(a0) +; RV64-PIC-NEXT: ret + store double 1.234500e+03, ptr %p + ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll index 12684f30dbee0..4246aa545dd0e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll @@ -508,9 +508,8 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: lui a0, %hi(.LCPI14_0) -; RV64I-NEXT: ld a1, %lo(.LCPI14_0)(a0) ; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a1, a1, 63 @@ -599,35 +598,31 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind { ; ; RV64I-LABEL: fnmadd_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI15_0) -; RV64I-NEXT: ld s1, %lo(.LCPI15_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call fma -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd double 0.0, %a %c_ = fadd double 0.0, %c @@ -708,35 +703,31 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind { ; ; RV64I-LABEL: fnmadd_d_2: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI16_0) -; RV64I-NEXT: ld s1, %lo(.LCPI16_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call fma -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %b_ = fadd double 0.0, %b %c_ = fadd double 0.0, %c @@ -869,9 +860,8 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI19_0) -; RV64I-NEXT: ld a1, %lo(.LCPI19_0)(a1) ; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a1, a1, 63 @@ -948,9 +938,8 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI20_0) -; RV64I-NEXT: ld a1, %lo(.LCPI20_0)(a1) ; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a1, a1, 63 @@ -1078,9 +1067,8 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: lui a0, %hi(.LCPI22_0) -; RV64I-NEXT: ld a1, %lo(.LCPI22_0)(a0) ; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 @@ -1186,28 +1174,25 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; ; RV64I-LABEL: fnmadd_d_contract: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI23_0) -; RV64I-NEXT: ld s1, %lo(.LCPI23_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __muldf3 ; RV64I-NEXT: li a1, -1 @@ -1215,12 +1200,11 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call __subdf3 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd double 0.0, %a ; avoid negation using xor %b_ = fadd double 0.0, %b ; avoid negation using xor @@ -1302,34 +1286,30 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { ; ; RV64I-LABEL: fnmsub_d_contract: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI24_0) -; RV64I-NEXT: ld s1, %lo(.LCPI24_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __muldf3 ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __subdf3 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd double 0.0, %a ; avoid negation using xor %b_ = fadd double 0.0, %b ; avoid negation using xor diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll index 739f225ad1525..3222849641baf 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll @@ -472,9 +472,8 @@ define float @fmsub_s(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: lui a0, %hi(.LCPI14_0) -; RV32I-NEXT: lw a1, %lo(.LCPI14_0)(a0) ; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a2, a0, a2 @@ -495,9 +494,8 @@ define float @fmsub_s(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: lui a0, %hi(.LCPI14_0) -; RV64I-NEXT: lw a1, %lo(.LCPI14_0)(a0) ; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: xor a2, a0, a2 @@ -526,66 +524,58 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind { ; ; RV32I-LABEL: fnmadd_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: lui a1, %hi(.LCPI15_0) -; RV32I-NEXT: lw s1, %lo(.LCPI15_0)(a1) -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s3, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call fmaf -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fnmadd_s: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI15_0) -; RV64I-NEXT: lw s1, %lo(.LCPI15_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call fmaf -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd float 0.0, %a %c_ = fadd float 0.0, %c @@ -606,66 +596,58 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind { ; ; RV32I-LABEL: fnmadd_s_2: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: lui a1, %hi(.LCPI16_0) -; RV32I-NEXT: lw s1, %lo(.LCPI16_0)(a1) -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s3, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call fmaf -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fnmadd_s_2: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI16_0) -; RV64I-NEXT: lw s1, %lo(.LCPI16_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call fmaf -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %b_ = fadd float 0.0, %b %c_ = fadd float 0.0, %c @@ -778,9 +760,8 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: lui a1, %hi(.LCPI19_0) -; RV32I-NEXT: lw a1, %lo(.LCPI19_0)(a1) ; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 @@ -800,9 +781,8 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI19_0) -; RV64I-NEXT: lw a1, %lo(.LCPI19_0)(a1) ; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 @@ -836,9 +816,8 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: lui a1, %hi(.LCPI20_0) -; RV32I-NEXT: lw a1, %lo(.LCPI20_0)(a1) ; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a1, a0, a1 @@ -859,9 +838,8 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI20_0) -; RV64I-NEXT: lw a1, %lo(.LCPI20_0)(a1) ; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a1, a0, a1 @@ -935,9 +913,8 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: lui a0, %hi(.LCPI22_0) -; RV32I-NEXT: lw a1, %lo(.LCPI22_0)(a0) ; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 @@ -961,9 +938,8 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: lui a0, %hi(.LCPI22_0) -; RV64I-NEXT: lw a1, %lo(.LCPI22_0)(a0) ; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 @@ -997,78 +973,70 @@ define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { ; ; RV32I-LABEL: fnmadd_s_contract: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: lui a1, %hi(.LCPI23_0) -; RV32I-NEXT: lw s1, %lo(.LCPI23_0)(a1) -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __mulsf3 ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __subsf3 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fnmadd_s_contract: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI23_0) -; RV64I-NEXT: lw s1, %lo(.LCPI23_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __mulsf3 ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call __subsf3 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd float 0.0, %a ; avoid negation using xor %b_ = fadd float 0.0, %b ; avoid negation using xor @@ -1090,66 +1058,58 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { ; ; RV32I-LABEL: fnmsub_s_contract: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: lui a1, %hi(.LCPI24_0) -; RV32I-NEXT: lw s1, %lo(.LCPI24_0)(a1) -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3 ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __subsf3 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fnmsub_s_contract: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI24_0) -; RV64I-NEXT: lw s1, %lo(.LCPI24_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3 ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __subsf3 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd float 0.0, %a ; avoid negation using xor %b_ = fadd float 0.0, %b ; avoid negation using xor diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll index d9ddf655c283a..bb96ba7e5b1fb 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll @@ -437,8 +437,8 @@ define void @va1_caller() nounwind { ; LP64: # %bb.0: ; LP64-NEXT: addi sp, sp, -16 ; LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; LP64-NEXT: lui a0, %hi(.LCPI3_0) -; LP64-NEXT: ld a1, %lo(.LCPI3_0)(a0) +; LP64-NEXT: li a1, 1023 +; LP64-NEXT: slli a1, a1, 52 ; LP64-NEXT: li a2, 2 ; LP64-NEXT: call va1 ; LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -494,8 +494,8 @@ define void @va1_caller() nounwind { ; RV64-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: addi s0, sp, 16 -; RV64-WITHFP-NEXT: lui a0, %hi(.LCPI3_0) -; RV64-WITHFP-NEXT: ld a1, %lo(.LCPI3_0)(a0) +; RV64-WITHFP-NEXT: li a1, 1023 +; RV64-WITHFP-NEXT: slli a1, a1, 52 ; RV64-WITHFP-NEXT: li a2, 2 ; RV64-WITHFP-NEXT: call va1 ; RV64-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload From dc00abac0e79359a16d617f1c36b5a628fa5b8a1 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 15 Sep 2025 13:33:07 +0000 Subject: [PATCH 336/734] [llvm] Add env prefix to environment variable This substitution is used in a few tests in certain build configurations (seems like only a 2-stage build on Darwin with Asan enabled on a previous stage). This needs an env prefix now that we have enabled the internal shell by default, or the tests end up failing. --- llvm/test/lit.cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index e8861e29be707..dd3f947b186b3 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -156,7 +156,7 @@ def get_asan_rtlib(): ld64_cmd = config.ld64_executable asan_rtlib = get_asan_rtlib() if asan_rtlib: - ld64_cmd = "DYLD_INSERT_LIBRARIES={} {}".format(asan_rtlib, ld64_cmd) + ld64_cmd = "env DYLD_INSERT_LIBRARIES={} {}".format(asan_rtlib, ld64_cmd) if config.osx_sysroot: ld64_cmd = "{} -syslibroot {}".format(ld64_cmd, config.osx_sysroot) From 228e24028d35af81172f1bf9b49abcd226e76aae Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 15 Sep 2025 15:39:42 +0200 Subject: [PATCH 337/734] Add llvm-ml64 to LLVM_TOOLCHAIN_TOOLS (#158582) So that it gets included in LLVM_INSTALL_TOOLCHAIN_ONLY builds, such as when building the Windows installer. Fixes #149664 --- llvm/cmake/modules/AddLLVM.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index c98e78da97b39..80e59a4df2433 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -1442,6 +1442,7 @@ if(NOT LLVM_TOOLCHAIN_TOOLS) llvm-lib llvm-mca llvm-ml + llvm-ml64 llvm-nm llvm-objcopy llvm-objdump From 00c051080c3b03f2f6a346605c485bb828579b8d Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 15 Sep 2025 15:42:53 +0200 Subject: [PATCH 338/734] [bazel] Add missing dependency for 471bd1745ef044a7ee58a4947bf06a7f56660502 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index ffa4a2effbfd1..48725d2f32dbc 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4722,6 +4722,7 @@ cc_library( ":TransformDialect", ":TransformDialectInterfaces", ":TransformUtils", + "//llvm:Support", ], ) From 64d5e6c4b35be1840bfffe57a24db2b9d18d65fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Mon, 15 Sep 2025 15:52:27 +0200 Subject: [PATCH 339/734] [NFC][clang] replace a C-array with std::array (#158047) Follow up to #157841, replacing the C-array with std::array so iterators can be used. --------- Co-authored-by: Nikolas Klauser --- clang/utils/TableGen/ClangAttrEmitter.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index a4e4de32ba53f..1342e1a6ffb5b 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -5169,7 +5169,7 @@ enum class SpellingKind : size_t { static const size_t NumSpellingKinds = (size_t)SpellingKind::NumSpellingKinds; class SpellingList { - std::vector Spellings[NumSpellingKinds]; + std::array, NumSpellingKinds> Spellings; public: ArrayRef operator[](SpellingKind K) const { @@ -5217,11 +5217,7 @@ class SpellingList { } bool hasSpelling() const { - for (size_t Kind = 0; Kind < NumSpellingKinds; ++Kind) { - if (Spellings[Kind].size() > 0) - return true; - } - return false; + return llvm::any_of(Spellings, [](const auto &L) { return !L.empty(); }); } }; From 92dcbf4092059b0c61865a3eac9520d7b5440951 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Mon, 15 Sep 2025 06:53:08 -0700 Subject: [PATCH 340/734] [NFC] Fix commas on assertion from 147514 (#158635) As brought up on the review for #147514, the original patch doesn't correctly parenthesize the expression in the assert. This fixes it. --- clang/lib/CodeGen/CGExprScalar.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index ce483c5cc4e45..4fa25c5d66669 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2142,9 +2142,9 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) { bool Ignore = TestAndClearIgnoreResultAssign(); (void)Ignore; unsigned NumInitElements = E->getNumInits(); - assert(Ignore == false || - (NumInitElements == 0 && E->getType()->isVoidType()) && - "init list ignored"); + assert((Ignore == false || + (NumInitElements == 0 && E->getType()->isVoidType())) && + "init list ignored"); // HLSL initialization lists in the AST are an expansion which can contain // side-effecting expressions wrapped in opaque value expressions. To properly From 53a4e4a77bd70731320bfa82ddbe6f88ee30ea6f Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 15 Sep 2025 15:56:15 +0200 Subject: [PATCH 341/734] [libc++] Specialize __lazy_synth_three_way_comparator for std::greater and friends (#157624) This specializes `__lazy_synth_three_way_comparator` to forward to `__default_three_way_comparator` if the comparator desugars to `__greater_tag`. This is the same as the desugaring to `__less_tag` except that the sign has to be inverted. --- .../lazy_synth_three_way_comparator.h | 28 +++++++++++++++---- .../associative/map/map.ops/find.pass.cpp | 16 +++++++++++ .../multimap/multimap.ops/find.pass.cpp | 13 +++++++++ .../associative/multiset/find.pass.cpp | 15 ++++++++++ .../containers/associative/set/find.pass.cpp | 15 ++++++++++ 5 files changed, 81 insertions(+), 6 deletions(-) diff --git a/libcxx/include/__utility/lazy_synth_three_way_comparator.h b/libcxx/include/__utility/lazy_synth_three_way_comparator.h index 9105d05e1ed6a..8c78742ccb4e3 100644 --- a/libcxx/include/__utility/lazy_synth_three_way_comparator.h +++ b/libcxx/include/__utility/lazy_synth_three_way_comparator.h @@ -70,12 +70,11 @@ struct __eager_compare_result { }; template -struct __lazy_synth_three_way_comparator< - _Comparator, - _LHS, - _RHS, - __enable_if_t<_And<__desugars_to<__less_tag, _Comparator, _LHS, _RHS>, - __has_default_three_way_comparator<_LHS, _RHS> >::value> > { +struct __lazy_synth_three_way_comparator<_Comparator, + _LHS, + _RHS, + __enable_if_t<_And<__desugars_to<__less_tag, _Comparator, _LHS, _RHS>, + __has_default_three_way_comparator<_LHS, _RHS> >::value> > { // This lifetimebound annotation is technically incorrect, but other specializations actually capture the lifetime of // the comparator. _LIBCPP_HIDE_FROM_ABI __lazy_synth_three_way_comparator(_LIBCPP_CTOR_LIFETIMEBOUND const _Comparator&) {} @@ -87,6 +86,23 @@ struct __lazy_synth_three_way_comparator< } }; +template +struct __lazy_synth_three_way_comparator<_Comparator, + _LHS, + _RHS, + __enable_if_t<_And<__desugars_to<__greater_tag, _Comparator, _LHS, _RHS>, + __has_default_three_way_comparator<_LHS, _RHS> >::value> > { + // This lifetimebound annotation is technically incorrect, but other specializations actually capture the lifetime of + // the comparator. + _LIBCPP_HIDE_FROM_ABI __lazy_synth_three_way_comparator(_LIBCPP_CTOR_LIFETIMEBOUND const _Comparator&) {} + + // Same comment as above. + _LIBCPP_HIDE_FROM_ABI static __eager_compare_result + operator()(_LIBCPP_LIFETIMEBOUND const _LHS& __lhs, _LIBCPP_LIFETIMEBOUND const _RHS& __rhs) { + return __eager_compare_result(-__default_three_way_comparator<_LHS, _RHS>()(__lhs, __rhs)); + } +}; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___UTILITY_LAZY_SYNTH_THREE_WAY_COMPARATOR_H diff --git a/libcxx/test/std/containers/associative/map/map.ops/find.pass.cpp b/libcxx/test/std/containers/associative/map/map.ops/find.pass.cpp index 534d78128407d..63dbcda512803 100644 --- a/libcxx/test/std/containers/associative/map/map.ops/find.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.ops/find.pass.cpp @@ -72,6 +72,22 @@ int main(int, char**) { assert(r == std::next(m.begin(), 8)); } } + { // Check with std::greater to ensure we're actually using the correct comparator + using Pair = std::pair; + using Map = std::map >; + Pair ar[] = {Pair(5, 5), Pair(6, 6), Pair(7, 7), Pair(8, 8), Pair(9, 9), Pair(10, 10), Pair(11, 11), Pair(12, 12)}; + Map m(ar, ar + sizeof(ar) / sizeof(ar[0])); + assert(m.find(12) == std::next(m.begin(), 0)); + assert(m.find(11) == std::next(m.begin(), 1)); + assert(m.find(10) == std::next(m.begin(), 2)); + assert(m.find(9) == std::next(m.begin(), 3)); + assert(m.find(8) == std::next(m.begin(), 4)); + assert(m.find(7) == std::next(m.begin(), 5)); + assert(m.find(6) == std::next(m.begin(), 6)); + assert(m.find(5) == std::next(m.begin(), 7)); + assert(m.find(4) == std::next(m.begin(), 8)); + assert(std::next(m.begin(), 8) == m.end()); + } #if TEST_STD_VER >= 11 { typedef std::pair V; diff --git a/libcxx/test/std/containers/associative/multimap/multimap.ops/find.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.ops/find.pass.cpp index 15df6c15bfa78..7939e77da308d 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.ops/find.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.ops/find.pass.cpp @@ -69,6 +69,19 @@ int main(int, char**) { assert(r == m.end()); } } + { + using Pair = std::pair; + using Map = std::multimap >; + Pair arr[] = { + Pair(5, 1), Pair(5, 2), Pair(5, 3), Pair(7, 1), Pair(7, 2), Pair(7, 3), Pair(9, 1), Pair(9, 2), Pair(9, 3)}; + const Map m(arr, arr + sizeof(arr) / sizeof(arr[0])); + assert(iter_in_range(std::next(m.begin(), 6), std::next(m.begin(), 9), m.find(5))); + assert(m.find(6) == m.end()); + assert(iter_in_range(std::next(m.begin(), 3), std::next(m.begin(), 6), m.find(7))); + assert(m.find(8) == m.end()); + assert(iter_in_range(std::next(m.begin(), 0), std::next(m.begin(), 3), m.find(9))); + assert(m.find(10) == m.end()); + } #if TEST_STD_VER >= 11 { typedef std::multimap, min_allocator>> M; diff --git a/libcxx/test/std/containers/associative/multiset/find.pass.cpp b/libcxx/test/std/containers/associative/multiset/find.pass.cpp index 62e6b9dae431d..866de0da5ea93 100644 --- a/libcxx/test/std/containers/associative/multiset/find.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/find.pass.cpp @@ -71,6 +71,21 @@ int main(int, char**) { assert(r == std::next(m.begin(), 8)); } } + { // Check with std::greater to ensure we're actually using the correct comparator + using Set = std::multiset >; + int ar[] = {5, 6, 7, 8, 9, 10, 11, 12}; + Set m(ar, ar + sizeof(ar) / sizeof(ar[0])); + assert(m.find(12) == std::next(m.begin(), 0)); + assert(m.find(11) == std::next(m.begin(), 1)); + assert(m.find(10) == std::next(m.begin(), 2)); + assert(m.find(9) == std::next(m.begin(), 3)); + assert(m.find(8) == std::next(m.begin(), 4)); + assert(m.find(7) == std::next(m.begin(), 5)); + assert(m.find(6) == std::next(m.begin(), 6)); + assert(m.find(5) == std::next(m.begin(), 7)); + assert(m.find(4) == std::next(m.begin(), 8)); + assert(std::next(m.begin(), 8) == m.end()); + } #if TEST_STD_VER >= 11 { typedef int V; diff --git a/libcxx/test/std/containers/associative/set/find.pass.cpp b/libcxx/test/std/containers/associative/set/find.pass.cpp index 88ceff0cb144f..deb193c17bfa9 100644 --- a/libcxx/test/std/containers/associative/set/find.pass.cpp +++ b/libcxx/test/std/containers/associative/set/find.pass.cpp @@ -71,6 +71,21 @@ int main(int, char**) { assert(r == std::next(m.begin(), 8)); } } + { // Check with std::greater to ensure we're actually using the correct comparator + using Set = std::set >; + int ar[] = {5, 6, 7, 8, 9, 10, 11, 12}; + Set m(ar, ar + sizeof(ar) / sizeof(ar[0])); + assert(m.find(12) == std::next(m.begin(), 0)); + assert(m.find(11) == std::next(m.begin(), 1)); + assert(m.find(10) == std::next(m.begin(), 2)); + assert(m.find(9) == std::next(m.begin(), 3)); + assert(m.find(8) == std::next(m.begin(), 4)); + assert(m.find(7) == std::next(m.begin(), 5)); + assert(m.find(6) == std::next(m.begin(), 6)); + assert(m.find(5) == std::next(m.begin(), 7)); + assert(m.find(4) == std::next(m.begin(), 8)); + assert(std::next(m.begin(), 8) == m.end()); + } #if TEST_STD_VER >= 11 { typedef int V; From f1a02c681f0d092bb28ac3ff2e79eff11ecb95dc Mon Sep 17 00:00:00 2001 From: Ebuka Ezike Date: Mon, 15 Sep 2025 15:19:55 +0100 Subject: [PATCH 342/734] [lldb] Fix unordered-map data formatter for const types (#156033) The test was failing because the const qualifier is not removed when checking if the type is an `unordered_map` --- .../Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp index f88a5319068a2..4b183a8d62e53 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp @@ -113,10 +113,11 @@ CompilerType lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: // wraps a std::pair. Peel away the internal wrapper type - whose structure is // of no value to users, to expose the std::pair. This matches the structure // returned by the std::map synthetic provider. - if (isUnorderedMap(m_backend.GetCompilerType() - .GetNonReferenceType() - .GetCanonicalType() - .GetTypeName())) { + CompilerType backend_type = m_backend.GetCompilerType(); + if (backend_type.IsPointerOrReferenceType()) + backend_type = backend_type.GetPointeeType(); + + if (isUnorderedMap(backend_type.GetCanonicalType().GetTypeName())) { std::string name; CompilerType field_type = element_type.GetFieldAtIndex(0, name, nullptr, nullptr, nullptr); From 5e118eca93ad7591c7b904a160d4d42cd37903c5 Mon Sep 17 00:00:00 2001 From: Ebuka Ezike Date: Mon, 15 Sep 2025 15:21:56 +0100 Subject: [PATCH 343/734] [lldb][test] Fix unordered-map test. (#158286) The build step is overidden so it uses `libstdc++` instead of `libc++` on linux --- .../unordered_map-iterator/TestDataFormatterStdUnorderedMap.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered_map-iterator/TestDataFormatterStdUnorderedMap.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered_map-iterator/TestDataFormatterStdUnorderedMap.py index d2382373f4810..1e920faab6397 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered_map-iterator/TestDataFormatterStdUnorderedMap.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered_map-iterator/TestDataFormatterStdUnorderedMap.py @@ -113,7 +113,6 @@ def do_test_ptr(self): Test that pointers to std::unordered_map are formatted correctly. """ - self.build() (self.target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( self, "Stop here", lldb.SBFileSpec("main.cpp", False) ) From c723cc2a041d6e7e741b0ce6abc1f18d4ada9b4a Mon Sep 17 00:00:00 2001 From: Tobias Stadler Date: Mon, 15 Sep 2025 16:22:00 +0200 Subject: [PATCH 344/734] [Remarks] BitstreamRemarkParser: Refactor error handling (#156511) In preparation of larger changes to the bitstream remark format, refactor the error handling code in the BitstreamRemarkParser. Main change: move the various static helper methods into the parser helper classes, so we don't need to pass around as many args. Calling `error(...)` inside the helper classes now automatically prepends the current block being parsed to the error message. NFCI (except for error messages on invalid bitstream files). Pull Request: https://github.com/llvm/llvm-project/pull/156511 --- llvm/lib/Remarks/BitstreamRemarkParser.cpp | 502 ++++++++------------- llvm/lib/Remarks/BitstreamRemarkParser.h | 207 ++++++--- 2 files changed, 344 insertions(+), 365 deletions(-) diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.cpp b/llvm/lib/Remarks/BitstreamRemarkParser.cpp index 86a6c6dffb187..d40b40dfb2ba0 100644 --- a/llvm/lib/Remarks/BitstreamRemarkParser.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkParser.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "BitstreamRemarkParser.h" -#include "llvm/Remarks/Remark.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include @@ -20,27 +19,68 @@ using namespace llvm; using namespace llvm::remarks; -static Error unknownRecord(const char *BlockName, unsigned RecordID) { - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing %s: unknown record entry (%lu).", BlockName, - RecordID); +namespace { + +template Error error(char const *Fmt, const Ts &...Vals) { + std::string Buffer; + raw_string_ostream OS(Buffer); + OS << formatv(Fmt, Vals...); + return make_error( + std::move(Buffer), + std::make_error_code(std::errc::illegal_byte_sequence)); +} + +} // namespace + +Error BitstreamBlockParserHelperBase::unknownRecord(unsigned AbbrevID) { + return error("Unknown record entry ({}).", AbbrevID); +} + +Error BitstreamBlockParserHelperBase::unexpectedRecord(StringRef RecordName) { + return error("Unexpected record entry ({}).", RecordName); +} + +Error BitstreamBlockParserHelperBase::malformedRecord(StringRef RecordName) { + return error("Malformed record entry ({}).", RecordName); +} + +Error BitstreamBlockParserHelperBase::unexpectedBlock(unsigned Code) { + return error("Unexpected subblock ({}).", Code); } -static Error malformedRecord(const char *BlockName, const char *RecordName) { - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing %s: malformed record entry (%s).", BlockName, - RecordName); +static Expected expectSubBlock(BitstreamCursor &Stream) { + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + switch (Next->Kind) { + case BitstreamEntry::SubBlock: + return Next->ID; + case BitstreamEntry::Record: + case BitstreamEntry::EndBlock: + return error("Expected subblock, but got unexpected record."); + case BitstreamEntry::Error: + return error("Expected subblock, but got unexpected end of bitstream."); + } + llvm_unreachable("Unexpected BitstreamEntry"); } -BitstreamMetaParserHelper::BitstreamMetaParserHelper( - BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo) - : Stream(Stream), BlockInfo(BlockInfo) {} +Error BitstreamBlockParserHelperBase::expectBlock() { + auto MaybeBlockID = expectSubBlock(Stream); + if (!MaybeBlockID) + return MaybeBlockID.takeError(); + if (*MaybeBlockID != BlockID) + return error("Expected {} block, but got unexpected block ({}).", BlockName, + *MaybeBlockID); + return Error::success(); +} -/// Parse a record and fill in the fields in the parser. -static Error parseRecord(BitstreamMetaParserHelper &Parser, unsigned Code) { - BitstreamCursor &Stream = Parser.Stream; +Error BitstreamBlockParserHelperBase::enterBlock() { + if (Stream.EnterSubBlock(BlockID)) + return error("Error while entering {} block.", BlockName); + return Error::success(); +} + +Error BitstreamMetaParserHelper::parseRecord(unsigned Code) { // Note: 2 is used here because it's the max number of fields we have per // record. SmallVector Record; @@ -52,171 +92,132 @@ static Error parseRecord(BitstreamMetaParserHelper &Parser, unsigned Code) { switch (*RecordID) { case RECORD_META_CONTAINER_INFO: { if (Record.size() != 2) - return malformedRecord("BLOCK_META", "RECORD_META_CONTAINER_INFO"); - Parser.ContainerVersion = Record[0]; - Parser.ContainerType = Record[1]; + return malformedRecord(MetaContainerInfoName); + Container = {Record[0], Record[1]}; + // Error immediately if container version is outdated, so the user sees an + // explanation instead of a parser error. + if (Container->Version != CurrentContainerVersion) { + return ::error( + "Unsupported remark container version (expected: {}, read: {}). " + "Please upgrade/downgrade your toolchain to read this container.", + CurrentContainerVersion, Container->Version); + } break; } case RECORD_META_REMARK_VERSION: { if (Record.size() != 1) - return malformedRecord("BLOCK_META", "RECORD_META_REMARK_VERSION"); - Parser.RemarkVersion = Record[0]; + return malformedRecord(MetaRemarkVersionName); + RemarkVersion = Record[0]; + // Error immediately if remark version is outdated, so the user sees an + // explanation instead of a parser error. + if (*RemarkVersion != CurrentRemarkVersion) { + return ::error( + "Unsupported remark version in container (expected: {}, read: {}). " + "Please upgrade/downgrade your toolchain to read this container.", + CurrentRemarkVersion, *RemarkVersion); + } break; } case RECORD_META_STRTAB: { if (Record.size() != 0) - return malformedRecord("BLOCK_META", "RECORD_META_STRTAB"); - Parser.StrTabBuf = Blob; + return malformedRecord(MetaStrTabName); + StrTabBuf = Blob; break; } case RECORD_META_EXTERNAL_FILE: { if (Record.size() != 0) - return malformedRecord("BLOCK_META", "RECORD_META_EXTERNAL_FILE"); - Parser.ExternalFilePath = Blob; + return malformedRecord(MetaExternalFileName); + ExternalFilePath = Blob; break; } default: - return unknownRecord("BLOCK_META", *RecordID); + return unknownRecord(*RecordID); } return Error::success(); } -BitstreamRemarkParserHelper::BitstreamRemarkParserHelper( - BitstreamCursor &Stream) - : Stream(Stream) {} - -/// Parse a record and fill in the fields in the parser. -static Error parseRecord(BitstreamRemarkParserHelper &Parser, unsigned Code) { - BitstreamCursor &Stream = Parser.Stream; - // Note: 5 is used here because it's the max number of fields we have per - // record. - SmallVector Record; - StringRef Blob; - Expected RecordID = Stream.readRecord(Code, Record, &Blob); - if (!RecordID) - return RecordID.takeError(); +Error BitstreamRemarkParserHelper::parseRecord(unsigned Code) { + Record.clear(); + Expected MaybeRecordID = + Stream.readRecord(Code, Record, &RecordBlob); + if (!MaybeRecordID) + return MaybeRecordID.takeError(); + RecordID = *MaybeRecordID; + return handleRecord(); +} - switch (*RecordID) { +Error BitstreamRemarkParserHelper::handleRecord() { + switch (RecordID) { case RECORD_REMARK_HEADER: { if (Record.size() != 4) - return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HEADER"); - Parser.Type = Record[0]; - Parser.RemarkNameIdx = Record[1]; - Parser.PassNameIdx = Record[2]; - Parser.FunctionNameIdx = Record[3]; + return malformedRecord(RemarkHeaderName); + Type = Record[0]; + RemarkNameIdx = Record[1]; + PassNameIdx = Record[2]; + FunctionNameIdx = Record[3]; break; } case RECORD_REMARK_DEBUG_LOC: { if (Record.size() != 3) - return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_DEBUG_LOC"); - Parser.SourceFileNameIdx = Record[0]; - Parser.SourceLine = Record[1]; - Parser.SourceColumn = Record[2]; + return malformedRecord(RemarkDebugLocName); + Loc = {Record[0], Record[1], Record[2]}; break; } case RECORD_REMARK_HOTNESS: { if (Record.size() != 1) - return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HOTNESS"); - Parser.Hotness = Record[0]; + return malformedRecord(RemarkHotnessName); + Hotness = Record[0]; break; } case RECORD_REMARK_ARG_WITH_DEBUGLOC: { if (Record.size() != 5) - return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_ARG_WITH_DEBUGLOC"); - // Create a temporary argument. Use that as a valid memory location for this - // argument entry. - Parser.TmpArgs.emplace_back(); - Parser.TmpArgs.back().KeyIdx = Record[0]; - Parser.TmpArgs.back().ValueIdx = Record[1]; - Parser.TmpArgs.back().SourceFileNameIdx = Record[2]; - Parser.TmpArgs.back().SourceLine = Record[3]; - Parser.TmpArgs.back().SourceColumn = Record[4]; - Parser.Args = - ArrayRef(Parser.TmpArgs); + return malformedRecord(RemarkArgWithDebugLocName); + auto &Arg = Args.emplace_back(Record[0], Record[1]); + Arg.Loc = {Record[2], Record[3], Record[4]}; break; } case RECORD_REMARK_ARG_WITHOUT_DEBUGLOC: { if (Record.size() != 2) - return malformedRecord("BLOCK_REMARK", - "RECORD_REMARK_ARG_WITHOUT_DEBUGLOC"); - // Create a temporary argument. Use that as a valid memory location for this - // argument entry. - Parser.TmpArgs.emplace_back(); - Parser.TmpArgs.back().KeyIdx = Record[0]; - Parser.TmpArgs.back().ValueIdx = Record[1]; - Parser.Args = - ArrayRef(Parser.TmpArgs); + return malformedRecord(RemarkArgWithoutDebugLocName); + Args.emplace_back(Record[0], Record[1]); break; } default: - return unknownRecord("BLOCK_REMARK", *RecordID); + return unknownRecord(RecordID); } return Error::success(); } -template -static Error parseBlock(T &ParserHelper, unsigned BlockID, - const char *BlockName) { - BitstreamCursor &Stream = ParserHelper.Stream; - Expected Next = Stream.advance(); - if (!Next) - return Next.takeError(); - if (Next->Kind != BitstreamEntry::SubBlock || Next->ID != BlockID) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing %s: expecting [ENTER_SUBBLOCK, %s, ...].", - BlockName, BlockName); - if (Stream.EnterSubBlock(BlockID)) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while entering %s.", BlockName); - - // Stop when there is nothing to read anymore or when we encounter an - // END_BLOCK. - while (!Stream.AtEndOfStream()) { - Next = Stream.advance(); - if (!Next) - return Next.takeError(); - switch (Next->Kind) { - case BitstreamEntry::EndBlock: - return Error::success(); - case BitstreamEntry::Error: - case BitstreamEntry::SubBlock: - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing %s: expecting records.", BlockName); - case BitstreamEntry::Record: - if (Error E = parseRecord(ParserHelper, Next->ID)) - return E; - continue; - } - } - // If we're here, it means we didn't get an END_BLOCK yet, but we're at the - // end of the stream. In this case, error. - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing %s: unterminated block.", BlockName); -} - -Error BitstreamMetaParserHelper::parse() { - return parseBlock(*this, META_BLOCK_ID, "META_BLOCK"); -} +Error BitstreamRemarkParserHelper::parseNext() { + Type.reset(); + RemarkNameIdx.reset(); + PassNameIdx.reset(); + FunctionNameIdx.reset(); + Hotness.reset(); + Loc.reset(); + Args.clear(); -Error BitstreamRemarkParserHelper::parse() { - return parseBlock(*this, REMARK_BLOCK_ID, "REMARK_BLOCK"); + if (Error E = expectBlock()) + return E; + return parseBlock(); } BitstreamParserHelper::BitstreamParserHelper(StringRef Buffer) : Stream(Buffer) {} -Expected> BitstreamParserHelper::parseMagic() { +Error BitstreamParserHelper::expectMagic() { std::array Result; - for (unsigned i = 0; i < 4; ++i) + for (unsigned I = 0; I < 4; ++I) if (Expected R = Stream.Read(8)) - Result[i] = *R; + Result[I] = *R; else return R.takeError(); - return Result; + + StringRef MagicNumber{Result.data(), Result.size()}; + if (MagicNumber != remarks::ContainerMagic) + return error("Unknown magic number: expecting {}, got {}.", + remarks::ContainerMagic, MagicNumber); + return Error::success(); } Error BitstreamParserHelper::parseBlockInfoBlock() { @@ -225,8 +226,7 @@ Error BitstreamParserHelper::parseBlockInfoBlock() { return Next.takeError(); if (Next->Kind != BitstreamEntry::SubBlock || Next->ID != llvm::bitc::BLOCKINFO_BLOCK_ID) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), + return error( "Error while parsing BLOCKINFO_BLOCK: expecting [ENTER_SUBBLOCK, " "BLOCKINFO_BLOCK, ...]."); @@ -236,9 +236,7 @@ Error BitstreamParserHelper::parseBlockInfoBlock() { return MaybeBlockInfo.takeError(); if (!*MaybeBlockInfo) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCKINFO_BLOCK."); + return error("Missing BLOCKINFO_BLOCK."); BlockInfo = **MaybeBlockInfo; @@ -246,77 +244,17 @@ Error BitstreamParserHelper::parseBlockInfoBlock() { return Error::success(); } -static Expected isBlock(BitstreamCursor &Stream, unsigned BlockID) { - bool Result = false; - uint64_t PreviousBitNo = Stream.GetCurrentBitNo(); - Expected Next = Stream.advance(); - if (!Next) - return Next.takeError(); - switch (Next->Kind) { - case BitstreamEntry::SubBlock: - // Check for the block id. - Result = Next->ID == BlockID; - break; - case BitstreamEntry::Error: - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Unexpected error while parsing bitstream."); - default: - Result = false; - break; - } - if (Error E = Stream.JumpToBit(PreviousBitNo)) - return std::move(E); - return Result; -} - -Expected BitstreamParserHelper::isMetaBlock() { - return isBlock(Stream, META_BLOCK_ID); -} - -Expected BitstreamParserHelper::isRemarkBlock() { - return isBlock(Stream, META_BLOCK_ID); -} - -static Error validateMagicNumber(StringRef MagicNumber) { - if (MagicNumber != remarks::ContainerMagic) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown magic number: expecting %s, got %.4s.", - remarks::ContainerMagic.data(), MagicNumber.data()); - return Error::success(); -} - -static Error advanceToMetaBlock(BitstreamParserHelper &Helper) { - Expected> MagicNumber = Helper.parseMagic(); - if (!MagicNumber) - return MagicNumber.takeError(); - if (Error E = validateMagicNumber( - StringRef(MagicNumber->data(), MagicNumber->size()))) +Error BitstreamParserHelper::advanceToMetaBlock() { + if (Error E = expectMagic()) return E; - if (Error E = Helper.parseBlockInfoBlock()) + if (Error E = parseBlockInfoBlock()) return E; - Expected isMetaBlock = Helper.isMetaBlock(); - if (!isMetaBlock) - return isMetaBlock.takeError(); - if (!*isMetaBlock) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Expecting META_BLOCK after the BLOCKINFO_BLOCK."); return Error::success(); } Expected> remarks::createBitstreamParserFromMeta( StringRef Buf, std::optional ExternalFilePrependPath) { - BitstreamParserHelper Helper(Buf); - Expected> MagicNumber = Helper.parseMagic(); - if (!MagicNumber) - return MagicNumber.takeError(); - - if (Error E = validateMagicNumber( - StringRef(MagicNumber->data(), MagicNumber->size()))) - return std::move(E); - auto Parser = std::make_unique(Buf); if (ExternalFilePrependPath) @@ -339,13 +277,13 @@ Expected> BitstreamRemarkParser::next() { } Error BitstreamRemarkParser::parseMeta() { - // Advance and to the meta block. - if (Error E = advanceToMetaBlock(ParserHelper)) + if (Error E = ParserHelper.advanceToMetaBlock()) return E; - BitstreamMetaParserHelper MetaHelper(ParserHelper.Stream, - ParserHelper.BlockInfo); - if (Error E = MetaHelper.parse()) + BitstreamMetaParserHelper MetaHelper(ParserHelper.Stream); + if (Error E = MetaHelper.expectBlock()) + return E; + if (Error E = MetaHelper.parseBlock()) return E; if (Error E = processCommonMeta(MetaHelper)) @@ -364,59 +302,41 @@ Error BitstreamRemarkParser::parseMeta() { Error BitstreamRemarkParser::processCommonMeta( BitstreamMetaParserHelper &Helper) { - if (std::optional Version = Helper.ContainerVersion) - ContainerVersion = *Version; - else - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_META: missing container version."); - - if (std::optional Type = Helper.ContainerType) { - // Always >= BitstreamRemarkContainerType::First since it's unsigned. - if (*Type > static_cast(BitstreamRemarkContainerType::Last)) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_META: invalid container type."); - - ContainerType = static_cast(*Type); - } else - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_META: missing container type."); - + if (!Helper.Container) + return Helper.error("Missing container info."); + auto &Container = *Helper.Container; + ContainerVersion = Container.Version; + // Always >= BitstreamRemarkContainerType::First since it's unsigned. + if (Container.Type > static_cast(BitstreamRemarkContainerType::Last)) + return Helper.error("Invalid container type."); + ContainerType = static_cast(Container.Type); return Error::success(); } -static Error processStrTab(BitstreamRemarkParser &P, - std::optional StrTabBuf) { - if (!StrTabBuf) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_META: missing string table."); +Error BitstreamRemarkParser::processStrTab(BitstreamMetaParserHelper &Helper) { + if (!Helper.StrTabBuf) + return Helper.error("Missing string table."); // Parse and assign the string table. - P.StrTab.emplace(*StrTabBuf); + StrTab.emplace(*Helper.StrTabBuf); return Error::success(); } -static Error processRemarkVersion(BitstreamRemarkParser &P, - std::optional RemarkVersion) { - if (!RemarkVersion) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_META: missing remark version."); - P.RemarkVersion = *RemarkVersion; +Error BitstreamRemarkParser::processRemarkVersion( + BitstreamMetaParserHelper &Helper) { + if (!Helper.RemarkVersion) + return Helper.error("Missing remark version."); + RemarkVersion = *Helper.RemarkVersion; return Error::success(); } Error BitstreamRemarkParser::processExternalFilePath( - std::optional ExternalFilePath) { - if (!ExternalFilePath) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_META: missing external file path."); + BitstreamMetaParserHelper &Helper) { + if (!Helper.ExternalFilePath) + return Helper.error("Missing external file path."); + StringRef ExternalFilePath = *Helper.ExternalFilePath; SmallString<80> FullPath(ExternalFilePrependPath); - sys::path::append(FullPath, *ExternalFilePath); + sys::path::append(FullPath, ExternalFilePath); // External file: open the external file, parse it, check if its metadata // matches the one from the separate metadata, then replace the current parser @@ -435,32 +355,22 @@ Error BitstreamRemarkParser::processExternalFilePath( // Create a separate parser used for parsing the separate file. ParserHelper = BitstreamParserHelper(TmpRemarkBuffer->getBuffer()); // Advance and check until we can parse the meta block. - if (Error E = advanceToMetaBlock(ParserHelper)) + if (Error E = ParserHelper.advanceToMetaBlock()) return E; // Parse the meta from the separate file. // Note: here we overwrite the BlockInfo with the one from the file. This will // be used to parse the rest of the file. - BitstreamMetaParserHelper SeparateMetaHelper(ParserHelper.Stream, - ParserHelper.BlockInfo); - if (Error E = SeparateMetaHelper.parse()) + BitstreamMetaParserHelper SeparateMetaHelper(ParserHelper.Stream); + if (Error E = SeparateMetaHelper.expectBlock()) + return E; + if (Error E = SeparateMetaHelper.parseBlock()) return E; - uint64_t PreviousContainerVersion = ContainerVersion; if (Error E = processCommonMeta(SeparateMetaHelper)) return E; if (ContainerType != BitstreamRemarkContainerType::SeparateRemarksFile) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing external file's BLOCK_META: wrong container " - "type."); - - if (PreviousContainerVersion != ContainerVersion) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing external file's BLOCK_META: mismatching versions: " - "original meta: %lu, external file meta: %lu.", - PreviousContainerVersion, ContainerVersion); + return SeparateMetaHelper.error("Wrong container type in external file."); // Process the meta from the separate file. return processSeparateRemarksFileMeta(SeparateMetaHelper); @@ -468,26 +378,26 @@ Error BitstreamRemarkParser::processExternalFilePath( Error BitstreamRemarkParser::processStandaloneMeta( BitstreamMetaParserHelper &Helper) { - if (Error E = processStrTab(*this, Helper.StrTabBuf)) + if (Error E = processStrTab(Helper)) return E; - return processRemarkVersion(*this, Helper.RemarkVersion); + return processRemarkVersion(Helper); } Error BitstreamRemarkParser::processSeparateRemarksFileMeta( BitstreamMetaParserHelper &Helper) { - return processRemarkVersion(*this, Helper.RemarkVersion); + return processRemarkVersion(Helper); } Error BitstreamRemarkParser::processSeparateRemarksMetaMeta( BitstreamMetaParserHelper &Helper) { - if (Error E = processStrTab(*this, Helper.StrTabBuf)) + if (Error E = processStrTab(Helper)) return E; - return processExternalFilePath(Helper.ExternalFilePath); + return processExternalFilePath(Helper); } Expected> BitstreamRemarkParser::parseRemark() { BitstreamRemarkParserHelper RemarkHelper(ParserHelper.Stream); - if (Error E = RemarkHelper.parse()) + if (Error E = RemarkHelper.parseNext()) return std::move(E); return processRemark(RemarkHelper); @@ -498,28 +408,20 @@ BitstreamRemarkParser::processRemark(BitstreamRemarkParserHelper &Helper) { std::unique_ptr Result = std::make_unique(); Remark &R = *Result; - if (StrTab == std::nullopt) - return createStringError( - std::make_error_code(std::errc::invalid_argument), - "Error while parsing BLOCK_REMARK: missing string table."); + if (!StrTab) + return Helper.error("Missing string table."); if (!Helper.Type) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_REMARK: missing remark type."); + return Helper.error("Missing remark type."); // Always >= Type::First since it's unsigned. if (*Helper.Type > static_cast(Type::Last)) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_REMARK: unknown remark type."); + return Helper.error("Unknown remark type."); R.RemarkType = static_cast(*Helper.Type); if (!Helper.RemarkNameIdx) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_REMARK: missing remark name."); + return Helper.error("Missing remark name."); if (Expected RemarkName = (*StrTab)[*Helper.RemarkNameIdx]) R.RemarkName = *RemarkName; @@ -527,9 +429,7 @@ BitstreamRemarkParser::processRemark(BitstreamRemarkParserHelper &Helper) { return RemarkName.takeError(); if (!Helper.PassNameIdx) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_REMARK: missing remark pass."); + return Helper.error("Missing remark pass."); if (Expected PassName = (*StrTab)[*Helper.PassNameIdx]) R.PassName = *PassName; @@ -537,61 +437,53 @@ BitstreamRemarkParser::processRemark(BitstreamRemarkParserHelper &Helper) { return PassName.takeError(); if (!Helper.FunctionNameIdx) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_REMARK: missing remark function name."); + return Helper.error("Missing remark function name."); + if (Expected FunctionName = (*StrTab)[*Helper.FunctionNameIdx]) R.FunctionName = *FunctionName; else return FunctionName.takeError(); - if (Helper.SourceFileNameIdx && Helper.SourceLine && Helper.SourceColumn) { - Expected SourceFileName = (*StrTab)[*Helper.SourceFileNameIdx]; + if (Helper.Loc) { + Expected SourceFileName = + (*StrTab)[Helper.Loc->SourceFileNameIdx]; if (!SourceFileName) return SourceFileName.takeError(); R.Loc.emplace(); R.Loc->SourceFilePath = *SourceFileName; - R.Loc->SourceLine = *Helper.SourceLine; - R.Loc->SourceColumn = *Helper.SourceColumn; + R.Loc->SourceLine = Helper.Loc->SourceLine; + R.Loc->SourceColumn = Helper.Loc->SourceColumn; } if (Helper.Hotness) R.Hotness = *Helper.Hotness; - if (!Helper.Args) - return std::move(Result); - - for (const BitstreamRemarkParserHelper::Argument &Arg : *Helper.Args) { + for (const BitstreamRemarkParserHelper::Argument &Arg : Helper.Args) { if (!Arg.KeyIdx) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_REMARK: missing key in remark argument."); + return Helper.error("Missing key in remark argument."); if (!Arg.ValueIdx) - return createStringError( - std::make_error_code(std::errc::illegal_byte_sequence), - "Error while parsing BLOCK_REMARK: missing value in remark " - "argument."); + return Helper.error("Missing value in remark argument."); // We have at least a key and a value, create an entry. - R.Args.emplace_back(); + auto &RArg = R.Args.emplace_back(); if (Expected Key = (*StrTab)[*Arg.KeyIdx]) - R.Args.back().Key = *Key; + RArg.Key = *Key; else return Key.takeError(); if (Expected Value = (*StrTab)[*Arg.ValueIdx]) - R.Args.back().Val = *Value; + RArg.Val = *Value; else return Value.takeError(); - if (Arg.SourceFileNameIdx && Arg.SourceLine && Arg.SourceColumn) { + if (Arg.Loc) { if (Expected SourceFileName = - (*StrTab)[*Arg.SourceFileNameIdx]) { - R.Args.back().Loc.emplace(); - R.Args.back().Loc->SourceFilePath = *SourceFileName; - R.Args.back().Loc->SourceLine = *Arg.SourceLine; - R.Args.back().Loc->SourceColumn = *Arg.SourceColumn; + (*StrTab)[Arg.Loc->SourceFileNameIdx]) { + RArg.Loc.emplace(); + RArg.Loc->SourceFilePath = *SourceFileName; + RArg.Loc->SourceLine = Arg.Loc->SourceLine; + RArg.Loc->SourceColumn = Arg.Loc->SourceColumn; } else return SourceFileName.takeError(); } diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.h b/llvm/lib/Remarks/BitstreamRemarkParser.h index cba805dc24b59..257ac46eb9495 100644 --- a/llvm/lib/Remarks/BitstreamRemarkParser.h +++ b/llvm/lib/Remarks/BitstreamRemarkParser.h @@ -13,14 +13,15 @@ #ifndef LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H #define LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/Remarks/BitstreamRemarkContainer.h" +#include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkParser.h" +#include "llvm/Remarks/RemarkStringTable.h" #include "llvm/Support/Error.h" -#include +#include "llvm/Support/FormatVariadic.h" #include #include #include @@ -28,66 +29,156 @@ namespace llvm { namespace remarks { -struct Remark; +class BitstreamBlockParserHelperBase { +protected: + BitstreamCursor &Stream; + + StringRef BlockName; + unsigned BlockID; + +public: + BitstreamBlockParserHelperBase(BitstreamCursor &Stream, unsigned BlockID, + StringRef BlockName) + : Stream(Stream), BlockName(BlockName), BlockID(BlockID) {} + + template Error error(char const *Fmt, const Ts &...Vals) { + std::string Buffer; + raw_string_ostream OS(Buffer); + OS << "Error while parsing " << BlockName << " block: "; + OS << formatv(Fmt, Vals...); + return make_error( + std::move(Buffer), + std::make_error_code(std::errc::illegal_byte_sequence)); + } + + Error expectBlock(); + +protected: + Error enterBlock(); + + Error unknownRecord(unsigned AbbrevID); + Error unexpectedRecord(StringRef RecordName); + Error malformedRecord(StringRef RecordName); + Error unexpectedBlock(unsigned Code); +}; + +template +class BitstreamBlockParserHelper : public BitstreamBlockParserHelperBase { +protected: + using BitstreamBlockParserHelperBase::BitstreamBlockParserHelperBase; + Derived &derived() { return *static_cast(this); } + + /// Parse a record and fill in the fields in the parser. + /// The subclass can statically override this method. + Error parseRecord(unsigned Code) { return unexpectedRecord(Code); } + + /// Parse a subblock and fill in the fields in the parser. + /// The subclass can statically override this method. + Error parseSubBlock(unsigned Code) { return unexpectedBlock(Code); } + +public: + /// Enter, parse, and leave this bitstream block. This expects the + /// BitstreamCursor to be right after the SubBlock entry (i.e. after calling + /// expectBlock). + Error parseBlock() { + if (Error E = enterBlock()) + return E; + + // Stop when there is nothing to read anymore or when we encounter an + // END_BLOCK. + while (true) { + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + switch (Next->Kind) { + case BitstreamEntry::SubBlock: + if (Error E = derived().parseSubBlock(Next->ID)) + return E; + continue; + case BitstreamEntry::EndBlock: + return Error::success(); + case BitstreamEntry::Record: + if (Error E = derived().parseRecord(Next->ID)) + return E; + continue; + case BitstreamEntry::Error: + return error("Unexpected end of bitstream."); + } + llvm_unreachable("Unexpected BitstreamEntry"); + } + } +}; /// Helper to parse a META_BLOCK for a bitstream remark container. -struct BitstreamMetaParserHelper { - /// The Bitstream reader. - BitstreamCursor &Stream; - /// Reference to the storage for the block info. - BitstreamBlockInfo &BlockInfo; - /// The parsed content: depending on the container type, some fields might be - /// empty. - std::optional ContainerVersion; - std::optional ContainerType; - std::optional StrTabBuf; - std::optional ExternalFilePath; +class BitstreamMetaParserHelper + : public BitstreamBlockParserHelper { + friend class BitstreamBlockParserHelper; + +public: + struct ContainerInfo { + uint64_t Version; + uint64_t Type; + }; + + /// The parsed content: depending on the container type, some fields might + /// be empty. + std::optional Container; std::optional RemarkVersion; + std::optional ExternalFilePath; + std::optional StrTabBuf; - /// Continue parsing with \p Stream. \p Stream is expected to contain a - /// ENTER_SUBBLOCK to the META_BLOCK at the current position. - /// \p Stream is expected to have a BLOCKINFO_BLOCK set. - BitstreamMetaParserHelper(BitstreamCursor &Stream, - BitstreamBlockInfo &BlockInfo); + BitstreamMetaParserHelper(BitstreamCursor &Stream) + : BitstreamBlockParserHelper(Stream, META_BLOCK_ID, MetaBlockName) {} - /// Parse the META_BLOCK and fill the available entries. - /// This helper does not check for the validity of the fields. - Error parse(); +protected: + Error parseRecord(unsigned Code); }; /// Helper to parse a REMARK_BLOCK for a bitstream remark container. -struct BitstreamRemarkParserHelper { - /// The Bitstream reader. - BitstreamCursor &Stream; +class BitstreamRemarkParserHelper + : public BitstreamBlockParserHelper { + friend class BitstreamBlockParserHelper; + +protected: + SmallVector Record; + StringRef RecordBlob; + unsigned RecordID; + +public: + struct RemarkLoc { + uint64_t SourceFileNameIdx; + uint64_t SourceLine; + uint64_t SourceColumn; + }; + + struct Argument { + std::optional KeyIdx; + std::optional ValueIdx; + std::optional Loc; + + Argument(std::optional KeyIdx, std::optional ValueIdx) + : KeyIdx(KeyIdx), ValueIdx(ValueIdx) {} + }; + /// The parsed content: depending on the remark, some fields might be empty. std::optional Type; std::optional RemarkNameIdx; std::optional PassNameIdx; std::optional FunctionNameIdx; - std::optional SourceFileNameIdx; - std::optional SourceLine; - std::optional SourceColumn; std::optional Hotness; - struct Argument { - std::optional KeyIdx; - std::optional ValueIdx; - std::optional SourceFileNameIdx; - std::optional SourceLine; - std::optional SourceColumn; - }; - std::optional> Args; - /// Avoid re-allocating a vector every time. - SmallVector TmpArgs; - - /// Continue parsing with \p Stream. \p Stream is expected to contain a - /// ENTER_SUBBLOCK to the REMARK_BLOCK at the current position. - /// \p Stream is expected to have a BLOCKINFO_BLOCK set and to have already - /// parsed the META_BLOCK. - BitstreamRemarkParserHelper(BitstreamCursor &Stream); - - /// Parse the REMARK_BLOCK and fill the available entries. - /// This helper does not check for the validity of the fields. - Error parse(); + std::optional Loc; + + SmallVector Args; + + BitstreamRemarkParserHelper(BitstreamCursor &Stream) + : BitstreamBlockParserHelper(Stream, REMARK_BLOCK_ID, RemarkBlockName) {} + + /// Clear helper state and parse next remark block. + Error parseNext(); + +protected: + Error parseRecord(unsigned Code); + Error handleRecord(); }; /// Helper to parse any bitstream remark container. @@ -98,21 +189,15 @@ struct BitstreamParserHelper { BitstreamBlockInfo BlockInfo; /// Start parsing at \p Buffer. BitstreamParserHelper(StringRef Buffer); - /// Parse the magic number. - Expected> parseMagic(); + /// Parse and validate the magic number. + Error expectMagic(); + /// Advance to the meta block + Error advanceToMetaBlock(); /// Parse the block info block containing all the abbrevs. /// This needs to be called before calling any other parsing function. Error parseBlockInfoBlock(); - /// Return true if the next block is a META_BLOCK. This function does not move - /// the cursor. - Expected isMetaBlock(); - /// Return true if the next block is a REMARK_BLOCK. This function does not - /// move the cursor. - Expected isRemarkBlock(); /// Return true if the parser reached the end of the stream. bool atEndOfStream() { return Stream.AtEndOfStream(); } - /// Jump to the end of the stream, skipping everything. - void skipToEnd() { return Stream.skipToEnd(); } }; /// Parses and holds the state of the latest parsed remark. @@ -149,14 +234,16 @@ struct BitstreamRemarkParser : public RemarkParser { Expected> parseRemark(); private: - /// Helper functions. Error processCommonMeta(BitstreamMetaParserHelper &Helper); Error processStandaloneMeta(BitstreamMetaParserHelper &Helper); Error processSeparateRemarksFileMeta(BitstreamMetaParserHelper &Helper); Error processSeparateRemarksMetaMeta(BitstreamMetaParserHelper &Helper); + Error processExternalFilePath(BitstreamMetaParserHelper &Helper); + Error processStrTab(BitstreamMetaParserHelper &Helper); + Error processRemarkVersion(BitstreamMetaParserHelper &Helper); + Expected> processRemark(BitstreamRemarkParserHelper &Helper); - Error processExternalFilePath(std::optional ExternalFilePath); }; Expected> createBitstreamParserFromMeta( From 271740c8baa08ad2451111f9d49d7cc3db92d199 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 15 Sep 2025 15:25:01 +0100 Subject: [PATCH 345/734] [NFC] Fix "shift implicitly converted" warning introduced by #154761 (#158619) --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 235dbc41c4bef..54bdb8750f709 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -947,7 +947,7 @@ bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) { if (!isa(N)) return false; - int64_t MulImm = 1 << cast(N)->getSExtValue(); + int64_t MulImm = 1LL << cast(N)->getSExtValue(); if (MulImm >= Low && MulImm <= High) { Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32); return true; From 4bf0001c0738efa5902287011c63650bac8791b5 Mon Sep 17 00:00:00 2001 From: zhijian lin Date: Mon, 15 Sep 2025 10:26:01 -0400 Subject: [PATCH 346/734] [PowerPC][NFC] Pre-commit test case: Implement a more efficient memcmp in cases where the length is known (#158367) The newly added test case will be used to verify a more efficient memcmp in cases where the length is known. --- llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll | 130 ++++++++++++++++++ llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll | 98 +++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll create mode 100644 llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll diff --git a/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll new file mode 100644 index 0000000000000..f5483ad2a7c3f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P8 + +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P10 + +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P8 + +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P10 + +define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) { +; CHECK-AIX32-P8-LABEL: cmpeq16: +; CHECK-AIX32-P8: # %bb.0: # %entry +; CHECK-AIX32-P8-NEXT: lwz r5, 4(r3) +; CHECK-AIX32-P8-NEXT: lwz r6, 0(r3) +; CHECK-AIX32-P8-NEXT: lwz r7, 4(r4) +; CHECK-AIX32-P8-NEXT: lwz r8, 0(r4) +; CHECK-AIX32-P8-NEXT: xor r6, r6, r8 +; CHECK-AIX32-P8-NEXT: xor r5, r5, r7 +; CHECK-AIX32-P8-NEXT: or. r5, r5, r6 +; CHECK-AIX32-P8-NEXT: bne cr0, L..BB0_2 +; CHECK-AIX32-P8-NEXT: # %bb.1: # %loadbb1 +; CHECK-AIX32-P8-NEXT: lwz r5, 12(r3) +; CHECK-AIX32-P8-NEXT: lwz r3, 8(r3) +; CHECK-AIX32-P8-NEXT: lwz r6, 12(r4) +; CHECK-AIX32-P8-NEXT: lwz r4, 8(r4) +; CHECK-AIX32-P8-NEXT: xor r3, r3, r4 +; CHECK-AIX32-P8-NEXT: xor r4, r5, r6 +; CHECK-AIX32-P8-NEXT: or. r3, r4, r3 +; CHECK-AIX32-P8-NEXT: li r3, 0 +; CHECK-AIX32-P8-NEXT: beq cr0, L..BB0_3 +; CHECK-AIX32-P8-NEXT: L..BB0_2: # %res_block +; CHECK-AIX32-P8-NEXT: li r3, 1 +; CHECK-AIX32-P8-NEXT: L..BB0_3: # %endblock +; CHECK-AIX32-P8-NEXT: cntlzw r3, r3 +; CHECK-AIX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-AIX32-P8-NEXT: blr +; +; CHECK-AIX32-P10-LABEL: cmpeq16: +; CHECK-AIX32-P10: # %bb.0: # %entry +; CHECK-AIX32-P10-NEXT: lwz r5, 4(r3) +; CHECK-AIX32-P10-NEXT: lwz r6, 0(r3) +; CHECK-AIX32-P10-NEXT: lwz r7, 4(r4) +; CHECK-AIX32-P10-NEXT: xor r5, r5, r7 +; CHECK-AIX32-P10-NEXT: lwz r8, 0(r4) +; CHECK-AIX32-P10-NEXT: xor r6, r6, r8 +; CHECK-AIX32-P10-NEXT: or. r5, r5, r6 +; CHECK-AIX32-P10-NEXT: bne cr0, L..BB0_2 +; CHECK-AIX32-P10-NEXT: # %bb.1: # %loadbb1 +; CHECK-AIX32-P10-NEXT: lwz r5, 12(r3) +; CHECK-AIX32-P10-NEXT: lwz r3, 8(r3) +; CHECK-AIX32-P10-NEXT: lwz r6, 12(r4) +; CHECK-AIX32-P10-NEXT: lwz r4, 8(r4) +; CHECK-AIX32-P10-NEXT: xor r3, r3, r4 +; CHECK-AIX32-P10-NEXT: xor r4, r5, r6 +; CHECK-AIX32-P10-NEXT: or. r3, r4, r3 +; CHECK-AIX32-P10-NEXT: li r3, 0 +; CHECK-AIX32-P10-NEXT: beq cr0, L..BB0_3 +; CHECK-AIX32-P10-NEXT: L..BB0_2: # %res_block +; CHECK-AIX32-P10-NEXT: li r3, 1 +; CHECK-AIX32-P10-NEXT: L..BB0_3: # %endblock +; CHECK-AIX32-P10-NEXT: cntlzw r3, r3 +; CHECK-AIX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-AIX32-P10-NEXT: blr +; +; CHECK-LINUX32-P8-LABEL: cmpeq16: +; CHECK-LINUX32-P8: # %bb.0: # %entry +; CHECK-LINUX32-P8-NEXT: lwz r5, 0(r3) +; CHECK-LINUX32-P8-NEXT: lwz r6, 4(r3) +; CHECK-LINUX32-P8-NEXT: lwz r7, 0(r4) +; CHECK-LINUX32-P8-NEXT: lwz r8, 4(r4) +; CHECK-LINUX32-P8-NEXT: xor r6, r6, r8 +; CHECK-LINUX32-P8-NEXT: xor r5, r5, r7 +; CHECK-LINUX32-P8-NEXT: or. r5, r5, r6 +; CHECK-LINUX32-P8-NEXT: bne cr0, .LBB0_2 +; CHECK-LINUX32-P8-NEXT: # %bb.1: # %loadbb1 +; CHECK-LINUX32-P8-NEXT: lwz r5, 8(r3) +; CHECK-LINUX32-P8-NEXT: lwz r3, 12(r3) +; CHECK-LINUX32-P8-NEXT: lwz r6, 8(r4) +; CHECK-LINUX32-P8-NEXT: lwz r4, 12(r4) +; CHECK-LINUX32-P8-NEXT: xor r3, r3, r4 +; CHECK-LINUX32-P8-NEXT: xor r4, r5, r6 +; CHECK-LINUX32-P8-NEXT: or. r3, r4, r3 +; CHECK-LINUX32-P8-NEXT: li r3, 0 +; CHECK-LINUX32-P8-NEXT: beq cr0, .LBB0_3 +; CHECK-LINUX32-P8-NEXT: .LBB0_2: # %res_block +; CHECK-LINUX32-P8-NEXT: li r3, 1 +; CHECK-LINUX32-P8-NEXT: .LBB0_3: # %endblock +; CHECK-LINUX32-P8-NEXT: cntlzw r3, r3 +; CHECK-LINUX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-LINUX32-P8-NEXT: blr +; +; CHECK-LINUX32-P10-LABEL: cmpeq16: +; CHECK-LINUX32-P10: # %bb.0: # %entry +; CHECK-LINUX32-P10-NEXT: lwz r5, 0(r3) +; CHECK-LINUX32-P10-NEXT: lwz r6, 4(r3) +; CHECK-LINUX32-P10-NEXT: lwz r7, 0(r4) +; CHECK-LINUX32-P10-NEXT: xor r5, r5, r7 +; CHECK-LINUX32-P10-NEXT: lwz r8, 4(r4) +; CHECK-LINUX32-P10-NEXT: xor r6, r6, r8 +; CHECK-LINUX32-P10-NEXT: or. r5, r5, r6 +; CHECK-LINUX32-P10-NEXT: bne cr0, .LBB0_2 +; CHECK-LINUX32-P10-NEXT: # %bb.1: # %loadbb1 +; CHECK-LINUX32-P10-NEXT: lwz r5, 8(r3) +; CHECK-LINUX32-P10-NEXT: lwz r3, 12(r3) +; CHECK-LINUX32-P10-NEXT: lwz r6, 8(r4) +; CHECK-LINUX32-P10-NEXT: lwz r4, 12(r4) +; CHECK-LINUX32-P10-NEXT: xor r3, r3, r4 +; CHECK-LINUX32-P10-NEXT: xor r4, r5, r6 +; CHECK-LINUX32-P10-NEXT: or. r3, r4, r3 +; CHECK-LINUX32-P10-NEXT: li r3, 0 +; CHECK-LINUX32-P10-NEXT: beq cr0, .LBB0_3 +; CHECK-LINUX32-P10-NEXT: .LBB0_2: # %res_block +; CHECK-LINUX32-P10-NEXT: li r3, 1 +; CHECK-LINUX32-P10-NEXT: .LBB0_3: # %endblock +; CHECK-LINUX32-P10-NEXT: cntlzw r3, r3 +; CHECK-LINUX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-LINUX32-P10-NEXT: blr +entry: + %bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i32 16) + %cmp = icmp eq i32 %bcmp, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +declare signext i32 @bcmp(ptr captures(none), ptr captures(none), i32) + diff --git a/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll new file mode 100644 index 0000000000000..216b7638642d4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P8 + +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P10 + +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P8 + +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P10 + +define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) { +; CHECK-AIX64-32-P8-LABEL: cmpeq16: +; CHECK-AIX64-32-P8: # %bb.0: # %entry +; CHECK-AIX64-32-P8-NEXT: ld r5, 0(r3) +; CHECK-AIX64-32-P8-NEXT: ld r6, 0(r4) +; CHECK-AIX64-32-P8-NEXT: cmpld r5, r6 +; CHECK-AIX64-32-P8-NEXT: bne cr0, L..BB0_2 +; CHECK-AIX64-32-P8-NEXT: # %bb.1: # %loadbb1 +; CHECK-AIX64-32-P8-NEXT: ld r5, 8(r3) +; CHECK-AIX64-32-P8-NEXT: ld r4, 8(r4) +; CHECK-AIX64-32-P8-NEXT: li r3, 0 +; CHECK-AIX64-32-P8-NEXT: cmpld r5, r4 +; CHECK-AIX64-32-P8-NEXT: beq cr0, L..BB0_3 +; CHECK-AIX64-32-P8-NEXT: L..BB0_2: # %res_block +; CHECK-AIX64-32-P8-NEXT: li r3, 1 +; CHECK-AIX64-32-P8-NEXT: L..BB0_3: # %endblock +; CHECK-AIX64-32-P8-NEXT: cntlzw r3, r3 +; CHECK-AIX64-32-P8-NEXT: srwi r3, r3, 5 +; CHECK-AIX64-32-P8-NEXT: blr +; +; CHECK-AIX64-32-P10-LABEL: cmpeq16: +; CHECK-AIX64-32-P10: # %bb.0: # %entry +; CHECK-AIX64-32-P10-NEXT: ld r5, 0(r3) +; CHECK-AIX64-32-P10-NEXT: ld r6, 0(r4) +; CHECK-AIX64-32-P10-NEXT: cmpld r5, r6 +; CHECK-AIX64-32-P10-NEXT: bne cr0, L..BB0_2 +; CHECK-AIX64-32-P10-NEXT: # %bb.1: # %loadbb1 +; CHECK-AIX64-32-P10-NEXT: ld r5, 8(r3) +; CHECK-AIX64-32-P10-NEXT: ld r4, 8(r4) +; CHECK-AIX64-32-P10-NEXT: li r3, 0 +; CHECK-AIX64-32-P10-NEXT: cmpld r5, r4 +; CHECK-AIX64-32-P10-NEXT: beq cr0, L..BB0_3 +; CHECK-AIX64-32-P10-NEXT: L..BB0_2: # %res_block +; CHECK-AIX64-32-P10-NEXT: li r3, 1 +; CHECK-AIX64-32-P10-NEXT: L..BB0_3: # %endblock +; CHECK-AIX64-32-P10-NEXT: cntlzw r3, r3 +; CHECK-AIX64-32-P10-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-AIX64-32-P10-NEXT: blr +; +; CHECK-LINUX64-P8-LABEL: cmpeq16: +; CHECK-LINUX64-P8: # %bb.0: # %entry +; CHECK-LINUX64-P8-NEXT: ld r5, 0(r3) +; CHECK-LINUX64-P8-NEXT: ld r6, 0(r4) +; CHECK-LINUX64-P8-NEXT: cmpld r5, r6 +; CHECK-LINUX64-P8-NEXT: bne cr0, .LBB0_2 +; CHECK-LINUX64-P8-NEXT: # %bb.1: # %loadbb1 +; CHECK-LINUX64-P8-NEXT: ld r5, 8(r3) +; CHECK-LINUX64-P8-NEXT: ld r4, 8(r4) +; CHECK-LINUX64-P8-NEXT: li r3, 0 +; CHECK-LINUX64-P8-NEXT: cmpld r5, r4 +; CHECK-LINUX64-P8-NEXT: beq cr0, .LBB0_3 +; CHECK-LINUX64-P8-NEXT: .LBB0_2: # %res_block +; CHECK-LINUX64-P8-NEXT: li r3, 1 +; CHECK-LINUX64-P8-NEXT: .LBB0_3: # %endblock +; CHECK-LINUX64-P8-NEXT: cntlzw r3, r3 +; CHECK-LINUX64-P8-NEXT: srwi r3, r3, 5 +; CHECK-LINUX64-P8-NEXT: blr +; +; CHECK-LINUX64-P10-LABEL: cmpeq16: +; CHECK-LINUX64-P10: # %bb.0: # %entry +; CHECK-LINUX64-P10-NEXT: ld r5, 0(r3) +; CHECK-LINUX64-P10-NEXT: ld r6, 0(r4) +; CHECK-LINUX64-P10-NEXT: cmpld r5, r6 +; CHECK-LINUX64-P10-NEXT: bne cr0, .LBB0_2 +; CHECK-LINUX64-P10-NEXT: # %bb.1: # %loadbb1 +; CHECK-LINUX64-P10-NEXT: ld r5, 8(r3) +; CHECK-LINUX64-P10-NEXT: ld r4, 8(r4) +; CHECK-LINUX64-P10-NEXT: li r3, 0 +; CHECK-LINUX64-P10-NEXT: cmpld r5, r4 +; CHECK-LINUX64-P10-NEXT: beq cr0, .LBB0_3 +; CHECK-LINUX64-P10-NEXT: .LBB0_2: # %res_block +; CHECK-LINUX64-P10-NEXT: li r3, 1 +; CHECK-LINUX64-P10-NEXT: .LBB0_3: # %endblock +; CHECK-LINUX64-P10-NEXT: cntlzw r3, r3 +; CHECK-LINUX64-P10-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-LINUX64-P10-NEXT: blr +entry: + %bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i64 16) + %cmp = icmp eq i32 %bcmp, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +declare signext i32 @bcmp(ptr captures(none), ptr captures(none), i64) + From 3c75065765c96ebf4f7f2f66eb21e45fb4d74704 Mon Sep 17 00:00:00 2001 From: Tobias Stadler Date: Mon, 15 Sep 2025 16:31:48 +0200 Subject: [PATCH 347/734] Revert "[Remarks] BitstreamRemarkParser: Refactor error handling" (#158647) Reverts llvm/llvm-project#156511. Build failure not caught by pre-commit CI. --- llvm/lib/Remarks/BitstreamRemarkParser.cpp | 502 +++++++++++++-------- llvm/lib/Remarks/BitstreamRemarkParser.h | 207 +++------ 2 files changed, 365 insertions(+), 344 deletions(-) diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.cpp b/llvm/lib/Remarks/BitstreamRemarkParser.cpp index d40b40dfb2ba0..86a6c6dffb187 100644 --- a/llvm/lib/Remarks/BitstreamRemarkParser.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkParser.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "BitstreamRemarkParser.h" +#include "llvm/Remarks/Remark.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include @@ -19,68 +20,27 @@ using namespace llvm; using namespace llvm::remarks; -namespace { - -template Error error(char const *Fmt, const Ts &...Vals) { - std::string Buffer; - raw_string_ostream OS(Buffer); - OS << formatv(Fmt, Vals...); - return make_error( - std::move(Buffer), - std::make_error_code(std::errc::illegal_byte_sequence)); -} - -} // namespace - -Error BitstreamBlockParserHelperBase::unknownRecord(unsigned AbbrevID) { - return error("Unknown record entry ({}).", AbbrevID); -} - -Error BitstreamBlockParserHelperBase::unexpectedRecord(StringRef RecordName) { - return error("Unexpected record entry ({}).", RecordName); -} - -Error BitstreamBlockParserHelperBase::malformedRecord(StringRef RecordName) { - return error("Malformed record entry ({}).", RecordName); -} - -Error BitstreamBlockParserHelperBase::unexpectedBlock(unsigned Code) { - return error("Unexpected subblock ({}).", Code); +static Error unknownRecord(const char *BlockName, unsigned RecordID) { + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: unknown record entry (%lu).", BlockName, + RecordID); } -static Expected expectSubBlock(BitstreamCursor &Stream) { - Expected Next = Stream.advance(); - if (!Next) - return Next.takeError(); - switch (Next->Kind) { - case BitstreamEntry::SubBlock: - return Next->ID; - case BitstreamEntry::Record: - case BitstreamEntry::EndBlock: - return error("Expected subblock, but got unexpected record."); - case BitstreamEntry::Error: - return error("Expected subblock, but got unexpected end of bitstream."); - } - llvm_unreachable("Unexpected BitstreamEntry"); +static Error malformedRecord(const char *BlockName, const char *RecordName) { + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: malformed record entry (%s).", BlockName, + RecordName); } -Error BitstreamBlockParserHelperBase::expectBlock() { - auto MaybeBlockID = expectSubBlock(Stream); - if (!MaybeBlockID) - return MaybeBlockID.takeError(); - if (*MaybeBlockID != BlockID) - return error("Expected {} block, but got unexpected block ({}).", BlockName, - *MaybeBlockID); - return Error::success(); -} +BitstreamMetaParserHelper::BitstreamMetaParserHelper( + BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo) + : Stream(Stream), BlockInfo(BlockInfo) {} -Error BitstreamBlockParserHelperBase::enterBlock() { - if (Stream.EnterSubBlock(BlockID)) - return error("Error while entering {} block.", BlockName); - return Error::success(); -} - -Error BitstreamMetaParserHelper::parseRecord(unsigned Code) { +/// Parse a record and fill in the fields in the parser. +static Error parseRecord(BitstreamMetaParserHelper &Parser, unsigned Code) { + BitstreamCursor &Stream = Parser.Stream; // Note: 2 is used here because it's the max number of fields we have per // record. SmallVector Record; @@ -92,132 +52,171 @@ Error BitstreamMetaParserHelper::parseRecord(unsigned Code) { switch (*RecordID) { case RECORD_META_CONTAINER_INFO: { if (Record.size() != 2) - return malformedRecord(MetaContainerInfoName); - Container = {Record[0], Record[1]}; - // Error immediately if container version is outdated, so the user sees an - // explanation instead of a parser error. - if (Container->Version != CurrentContainerVersion) { - return ::error( - "Unsupported remark container version (expected: {}, read: {}). " - "Please upgrade/downgrade your toolchain to read this container.", - CurrentContainerVersion, Container->Version); - } + return malformedRecord("BLOCK_META", "RECORD_META_CONTAINER_INFO"); + Parser.ContainerVersion = Record[0]; + Parser.ContainerType = Record[1]; break; } case RECORD_META_REMARK_VERSION: { if (Record.size() != 1) - return malformedRecord(MetaRemarkVersionName); - RemarkVersion = Record[0]; - // Error immediately if remark version is outdated, so the user sees an - // explanation instead of a parser error. - if (*RemarkVersion != CurrentRemarkVersion) { - return ::error( - "Unsupported remark version in container (expected: {}, read: {}). " - "Please upgrade/downgrade your toolchain to read this container.", - CurrentRemarkVersion, *RemarkVersion); - } + return malformedRecord("BLOCK_META", "RECORD_META_REMARK_VERSION"); + Parser.RemarkVersion = Record[0]; break; } case RECORD_META_STRTAB: { if (Record.size() != 0) - return malformedRecord(MetaStrTabName); - StrTabBuf = Blob; + return malformedRecord("BLOCK_META", "RECORD_META_STRTAB"); + Parser.StrTabBuf = Blob; break; } case RECORD_META_EXTERNAL_FILE: { if (Record.size() != 0) - return malformedRecord(MetaExternalFileName); - ExternalFilePath = Blob; + return malformedRecord("BLOCK_META", "RECORD_META_EXTERNAL_FILE"); + Parser.ExternalFilePath = Blob; break; } default: - return unknownRecord(*RecordID); + return unknownRecord("BLOCK_META", *RecordID); } return Error::success(); } -Error BitstreamRemarkParserHelper::parseRecord(unsigned Code) { - Record.clear(); - Expected MaybeRecordID = - Stream.readRecord(Code, Record, &RecordBlob); - if (!MaybeRecordID) - return MaybeRecordID.takeError(); - RecordID = *MaybeRecordID; - return handleRecord(); -} +BitstreamRemarkParserHelper::BitstreamRemarkParserHelper( + BitstreamCursor &Stream) + : Stream(Stream) {} -Error BitstreamRemarkParserHelper::handleRecord() { - switch (RecordID) { +/// Parse a record and fill in the fields in the parser. +static Error parseRecord(BitstreamRemarkParserHelper &Parser, unsigned Code) { + BitstreamCursor &Stream = Parser.Stream; + // Note: 5 is used here because it's the max number of fields we have per + // record. + SmallVector Record; + StringRef Blob; + Expected RecordID = Stream.readRecord(Code, Record, &Blob); + if (!RecordID) + return RecordID.takeError(); + + switch (*RecordID) { case RECORD_REMARK_HEADER: { if (Record.size() != 4) - return malformedRecord(RemarkHeaderName); - Type = Record[0]; - RemarkNameIdx = Record[1]; - PassNameIdx = Record[2]; - FunctionNameIdx = Record[3]; + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HEADER"); + Parser.Type = Record[0]; + Parser.RemarkNameIdx = Record[1]; + Parser.PassNameIdx = Record[2]; + Parser.FunctionNameIdx = Record[3]; break; } case RECORD_REMARK_DEBUG_LOC: { if (Record.size() != 3) - return malformedRecord(RemarkDebugLocName); - Loc = {Record[0], Record[1], Record[2]}; + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_DEBUG_LOC"); + Parser.SourceFileNameIdx = Record[0]; + Parser.SourceLine = Record[1]; + Parser.SourceColumn = Record[2]; break; } case RECORD_REMARK_HOTNESS: { if (Record.size() != 1) - return malformedRecord(RemarkHotnessName); - Hotness = Record[0]; + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HOTNESS"); + Parser.Hotness = Record[0]; break; } case RECORD_REMARK_ARG_WITH_DEBUGLOC: { if (Record.size() != 5) - return malformedRecord(RemarkArgWithDebugLocName); - auto &Arg = Args.emplace_back(Record[0], Record[1]); - Arg.Loc = {Record[2], Record[3], Record[4]}; + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_ARG_WITH_DEBUGLOC"); + // Create a temporary argument. Use that as a valid memory location for this + // argument entry. + Parser.TmpArgs.emplace_back(); + Parser.TmpArgs.back().KeyIdx = Record[0]; + Parser.TmpArgs.back().ValueIdx = Record[1]; + Parser.TmpArgs.back().SourceFileNameIdx = Record[2]; + Parser.TmpArgs.back().SourceLine = Record[3]; + Parser.TmpArgs.back().SourceColumn = Record[4]; + Parser.Args = + ArrayRef(Parser.TmpArgs); break; } case RECORD_REMARK_ARG_WITHOUT_DEBUGLOC: { if (Record.size() != 2) - return malformedRecord(RemarkArgWithoutDebugLocName); - Args.emplace_back(Record[0], Record[1]); + return malformedRecord("BLOCK_REMARK", + "RECORD_REMARK_ARG_WITHOUT_DEBUGLOC"); + // Create a temporary argument. Use that as a valid memory location for this + // argument entry. + Parser.TmpArgs.emplace_back(); + Parser.TmpArgs.back().KeyIdx = Record[0]; + Parser.TmpArgs.back().ValueIdx = Record[1]; + Parser.Args = + ArrayRef(Parser.TmpArgs); break; } default: - return unknownRecord(RecordID); + return unknownRecord("BLOCK_REMARK", *RecordID); } return Error::success(); } -Error BitstreamRemarkParserHelper::parseNext() { - Type.reset(); - RemarkNameIdx.reset(); - PassNameIdx.reset(); - FunctionNameIdx.reset(); - Hotness.reset(); - Loc.reset(); - Args.clear(); +template +static Error parseBlock(T &ParserHelper, unsigned BlockID, + const char *BlockName) { + BitstreamCursor &Stream = ParserHelper.Stream; + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + if (Next->Kind != BitstreamEntry::SubBlock || Next->ID != BlockID) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: expecting [ENTER_SUBBLOCK, %s, ...].", + BlockName, BlockName); + if (Stream.EnterSubBlock(BlockID)) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while entering %s.", BlockName); + + // Stop when there is nothing to read anymore or when we encounter an + // END_BLOCK. + while (!Stream.AtEndOfStream()) { + Next = Stream.advance(); + if (!Next) + return Next.takeError(); + switch (Next->Kind) { + case BitstreamEntry::EndBlock: + return Error::success(); + case BitstreamEntry::Error: + case BitstreamEntry::SubBlock: + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: expecting records.", BlockName); + case BitstreamEntry::Record: + if (Error E = parseRecord(ParserHelper, Next->ID)) + return E; + continue; + } + } + // If we're here, it means we didn't get an END_BLOCK yet, but we're at the + // end of the stream. In this case, error. + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: unterminated block.", BlockName); +} + +Error BitstreamMetaParserHelper::parse() { + return parseBlock(*this, META_BLOCK_ID, "META_BLOCK"); +} - if (Error E = expectBlock()) - return E; - return parseBlock(); +Error BitstreamRemarkParserHelper::parse() { + return parseBlock(*this, REMARK_BLOCK_ID, "REMARK_BLOCK"); } BitstreamParserHelper::BitstreamParserHelper(StringRef Buffer) : Stream(Buffer) {} -Error BitstreamParserHelper::expectMagic() { +Expected> BitstreamParserHelper::parseMagic() { std::array Result; - for (unsigned I = 0; I < 4; ++I) + for (unsigned i = 0; i < 4; ++i) if (Expected R = Stream.Read(8)) - Result[I] = *R; + Result[i] = *R; else return R.takeError(); - - StringRef MagicNumber{Result.data(), Result.size()}; - if (MagicNumber != remarks::ContainerMagic) - return error("Unknown magic number: expecting {}, got {}.", - remarks::ContainerMagic, MagicNumber); - return Error::success(); + return Result; } Error BitstreamParserHelper::parseBlockInfoBlock() { @@ -226,7 +225,8 @@ Error BitstreamParserHelper::parseBlockInfoBlock() { return Next.takeError(); if (Next->Kind != BitstreamEntry::SubBlock || Next->ID != llvm::bitc::BLOCKINFO_BLOCK_ID) - return error( + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), "Error while parsing BLOCKINFO_BLOCK: expecting [ENTER_SUBBLOCK, " "BLOCKINFO_BLOCK, ...]."); @@ -236,7 +236,9 @@ Error BitstreamParserHelper::parseBlockInfoBlock() { return MaybeBlockInfo.takeError(); if (!*MaybeBlockInfo) - return error("Missing BLOCKINFO_BLOCK."); + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCKINFO_BLOCK."); BlockInfo = **MaybeBlockInfo; @@ -244,17 +246,77 @@ Error BitstreamParserHelper::parseBlockInfoBlock() { return Error::success(); } -Error BitstreamParserHelper::advanceToMetaBlock() { - if (Error E = expectMagic()) +static Expected isBlock(BitstreamCursor &Stream, unsigned BlockID) { + bool Result = false; + uint64_t PreviousBitNo = Stream.GetCurrentBitNo(); + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + switch (Next->Kind) { + case BitstreamEntry::SubBlock: + // Check for the block id. + Result = Next->ID == BlockID; + break; + case BitstreamEntry::Error: + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Unexpected error while parsing bitstream."); + default: + Result = false; + break; + } + if (Error E = Stream.JumpToBit(PreviousBitNo)) + return std::move(E); + return Result; +} + +Expected BitstreamParserHelper::isMetaBlock() { + return isBlock(Stream, META_BLOCK_ID); +} + +Expected BitstreamParserHelper::isRemarkBlock() { + return isBlock(Stream, META_BLOCK_ID); +} + +static Error validateMagicNumber(StringRef MagicNumber) { + if (MagicNumber != remarks::ContainerMagic) + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown magic number: expecting %s, got %.4s.", + remarks::ContainerMagic.data(), MagicNumber.data()); + return Error::success(); +} + +static Error advanceToMetaBlock(BitstreamParserHelper &Helper) { + Expected> MagicNumber = Helper.parseMagic(); + if (!MagicNumber) + return MagicNumber.takeError(); + if (Error E = validateMagicNumber( + StringRef(MagicNumber->data(), MagicNumber->size()))) return E; - if (Error E = parseBlockInfoBlock()) + if (Error E = Helper.parseBlockInfoBlock()) return E; + Expected isMetaBlock = Helper.isMetaBlock(); + if (!isMetaBlock) + return isMetaBlock.takeError(); + if (!*isMetaBlock) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Expecting META_BLOCK after the BLOCKINFO_BLOCK."); return Error::success(); } Expected> remarks::createBitstreamParserFromMeta( StringRef Buf, std::optional ExternalFilePrependPath) { + BitstreamParserHelper Helper(Buf); + Expected> MagicNumber = Helper.parseMagic(); + if (!MagicNumber) + return MagicNumber.takeError(); + + if (Error E = validateMagicNumber( + StringRef(MagicNumber->data(), MagicNumber->size()))) + return std::move(E); + auto Parser = std::make_unique(Buf); if (ExternalFilePrependPath) @@ -277,13 +339,13 @@ Expected> BitstreamRemarkParser::next() { } Error BitstreamRemarkParser::parseMeta() { - if (Error E = ParserHelper.advanceToMetaBlock()) + // Advance and to the meta block. + if (Error E = advanceToMetaBlock(ParserHelper)) return E; - BitstreamMetaParserHelper MetaHelper(ParserHelper.Stream); - if (Error E = MetaHelper.expectBlock()) - return E; - if (Error E = MetaHelper.parseBlock()) + BitstreamMetaParserHelper MetaHelper(ParserHelper.Stream, + ParserHelper.BlockInfo); + if (Error E = MetaHelper.parse()) return E; if (Error E = processCommonMeta(MetaHelper)) @@ -302,41 +364,59 @@ Error BitstreamRemarkParser::parseMeta() { Error BitstreamRemarkParser::processCommonMeta( BitstreamMetaParserHelper &Helper) { - if (!Helper.Container) - return Helper.error("Missing container info."); - auto &Container = *Helper.Container; - ContainerVersion = Container.Version; - // Always >= BitstreamRemarkContainerType::First since it's unsigned. - if (Container.Type > static_cast(BitstreamRemarkContainerType::Last)) - return Helper.error("Invalid container type."); - ContainerType = static_cast(Container.Type); + if (std::optional Version = Helper.ContainerVersion) + ContainerVersion = *Version; + else + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing container version."); + + if (std::optional Type = Helper.ContainerType) { + // Always >= BitstreamRemarkContainerType::First since it's unsigned. + if (*Type > static_cast(BitstreamRemarkContainerType::Last)) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: invalid container type."); + + ContainerType = static_cast(*Type); + } else + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing container type."); + return Error::success(); } -Error BitstreamRemarkParser::processStrTab(BitstreamMetaParserHelper &Helper) { - if (!Helper.StrTabBuf) - return Helper.error("Missing string table."); +static Error processStrTab(BitstreamRemarkParser &P, + std::optional StrTabBuf) { + if (!StrTabBuf) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing string table."); // Parse and assign the string table. - StrTab.emplace(*Helper.StrTabBuf); + P.StrTab.emplace(*StrTabBuf); return Error::success(); } -Error BitstreamRemarkParser::processRemarkVersion( - BitstreamMetaParserHelper &Helper) { - if (!Helper.RemarkVersion) - return Helper.error("Missing remark version."); - RemarkVersion = *Helper.RemarkVersion; +static Error processRemarkVersion(BitstreamRemarkParser &P, + std::optional RemarkVersion) { + if (!RemarkVersion) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing remark version."); + P.RemarkVersion = *RemarkVersion; return Error::success(); } Error BitstreamRemarkParser::processExternalFilePath( - BitstreamMetaParserHelper &Helper) { - if (!Helper.ExternalFilePath) - return Helper.error("Missing external file path."); - StringRef ExternalFilePath = *Helper.ExternalFilePath; + std::optional ExternalFilePath) { + if (!ExternalFilePath) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing external file path."); SmallString<80> FullPath(ExternalFilePrependPath); - sys::path::append(FullPath, ExternalFilePath); + sys::path::append(FullPath, *ExternalFilePath); // External file: open the external file, parse it, check if its metadata // matches the one from the separate metadata, then replace the current parser @@ -355,22 +435,32 @@ Error BitstreamRemarkParser::processExternalFilePath( // Create a separate parser used for parsing the separate file. ParserHelper = BitstreamParserHelper(TmpRemarkBuffer->getBuffer()); // Advance and check until we can parse the meta block. - if (Error E = ParserHelper.advanceToMetaBlock()) + if (Error E = advanceToMetaBlock(ParserHelper)) return E; // Parse the meta from the separate file. // Note: here we overwrite the BlockInfo with the one from the file. This will // be used to parse the rest of the file. - BitstreamMetaParserHelper SeparateMetaHelper(ParserHelper.Stream); - if (Error E = SeparateMetaHelper.expectBlock()) - return E; - if (Error E = SeparateMetaHelper.parseBlock()) + BitstreamMetaParserHelper SeparateMetaHelper(ParserHelper.Stream, + ParserHelper.BlockInfo); + if (Error E = SeparateMetaHelper.parse()) return E; + uint64_t PreviousContainerVersion = ContainerVersion; if (Error E = processCommonMeta(SeparateMetaHelper)) return E; if (ContainerType != BitstreamRemarkContainerType::SeparateRemarksFile) - return SeparateMetaHelper.error("Wrong container type in external file."); + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing external file's BLOCK_META: wrong container " + "type."); + + if (PreviousContainerVersion != ContainerVersion) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing external file's BLOCK_META: mismatching versions: " + "original meta: %lu, external file meta: %lu.", + PreviousContainerVersion, ContainerVersion); // Process the meta from the separate file. return processSeparateRemarksFileMeta(SeparateMetaHelper); @@ -378,26 +468,26 @@ Error BitstreamRemarkParser::processExternalFilePath( Error BitstreamRemarkParser::processStandaloneMeta( BitstreamMetaParserHelper &Helper) { - if (Error E = processStrTab(Helper)) + if (Error E = processStrTab(*this, Helper.StrTabBuf)) return E; - return processRemarkVersion(Helper); + return processRemarkVersion(*this, Helper.RemarkVersion); } Error BitstreamRemarkParser::processSeparateRemarksFileMeta( BitstreamMetaParserHelper &Helper) { - return processRemarkVersion(Helper); + return processRemarkVersion(*this, Helper.RemarkVersion); } Error BitstreamRemarkParser::processSeparateRemarksMetaMeta( BitstreamMetaParserHelper &Helper) { - if (Error E = processStrTab(Helper)) + if (Error E = processStrTab(*this, Helper.StrTabBuf)) return E; - return processExternalFilePath(Helper); + return processExternalFilePath(Helper.ExternalFilePath); } Expected> BitstreamRemarkParser::parseRemark() { BitstreamRemarkParserHelper RemarkHelper(ParserHelper.Stream); - if (Error E = RemarkHelper.parseNext()) + if (Error E = RemarkHelper.parse()) return std::move(E); return processRemark(RemarkHelper); @@ -408,20 +498,28 @@ BitstreamRemarkParser::processRemark(BitstreamRemarkParserHelper &Helper) { std::unique_ptr Result = std::make_unique(); Remark &R = *Result; - if (!StrTab) - return Helper.error("Missing string table."); + if (StrTab == std::nullopt) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Error while parsing BLOCK_REMARK: missing string table."); if (!Helper.Type) - return Helper.error("Missing remark type."); + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark type."); // Always >= Type::First since it's unsigned. if (*Helper.Type > static_cast(Type::Last)) - return Helper.error("Unknown remark type."); + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: unknown remark type."); R.RemarkType = static_cast(*Helper.Type); if (!Helper.RemarkNameIdx) - return Helper.error("Missing remark name."); + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark name."); if (Expected RemarkName = (*StrTab)[*Helper.RemarkNameIdx]) R.RemarkName = *RemarkName; @@ -429,7 +527,9 @@ BitstreamRemarkParser::processRemark(BitstreamRemarkParserHelper &Helper) { return RemarkName.takeError(); if (!Helper.PassNameIdx) - return Helper.error("Missing remark pass."); + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark pass."); if (Expected PassName = (*StrTab)[*Helper.PassNameIdx]) R.PassName = *PassName; @@ -437,53 +537,61 @@ BitstreamRemarkParser::processRemark(BitstreamRemarkParserHelper &Helper) { return PassName.takeError(); if (!Helper.FunctionNameIdx) - return Helper.error("Missing remark function name."); - + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark function name."); if (Expected FunctionName = (*StrTab)[*Helper.FunctionNameIdx]) R.FunctionName = *FunctionName; else return FunctionName.takeError(); - if (Helper.Loc) { - Expected SourceFileName = - (*StrTab)[Helper.Loc->SourceFileNameIdx]; + if (Helper.SourceFileNameIdx && Helper.SourceLine && Helper.SourceColumn) { + Expected SourceFileName = (*StrTab)[*Helper.SourceFileNameIdx]; if (!SourceFileName) return SourceFileName.takeError(); R.Loc.emplace(); R.Loc->SourceFilePath = *SourceFileName; - R.Loc->SourceLine = Helper.Loc->SourceLine; - R.Loc->SourceColumn = Helper.Loc->SourceColumn; + R.Loc->SourceLine = *Helper.SourceLine; + R.Loc->SourceColumn = *Helper.SourceColumn; } if (Helper.Hotness) R.Hotness = *Helper.Hotness; - for (const BitstreamRemarkParserHelper::Argument &Arg : Helper.Args) { + if (!Helper.Args) + return std::move(Result); + + for (const BitstreamRemarkParserHelper::Argument &Arg : *Helper.Args) { if (!Arg.KeyIdx) - return Helper.error("Missing key in remark argument."); + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing key in remark argument."); if (!Arg.ValueIdx) - return Helper.error("Missing value in remark argument."); + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing value in remark " + "argument."); // We have at least a key and a value, create an entry. - auto &RArg = R.Args.emplace_back(); + R.Args.emplace_back(); if (Expected Key = (*StrTab)[*Arg.KeyIdx]) - RArg.Key = *Key; + R.Args.back().Key = *Key; else return Key.takeError(); if (Expected Value = (*StrTab)[*Arg.ValueIdx]) - RArg.Val = *Value; + R.Args.back().Val = *Value; else return Value.takeError(); - if (Arg.Loc) { + if (Arg.SourceFileNameIdx && Arg.SourceLine && Arg.SourceColumn) { if (Expected SourceFileName = - (*StrTab)[Arg.Loc->SourceFileNameIdx]) { - RArg.Loc.emplace(); - RArg.Loc->SourceFilePath = *SourceFileName; - RArg.Loc->SourceLine = Arg.Loc->SourceLine; - RArg.Loc->SourceColumn = Arg.Loc->SourceColumn; + (*StrTab)[*Arg.SourceFileNameIdx]) { + R.Args.back().Loc.emplace(); + R.Args.back().Loc->SourceFilePath = *SourceFileName; + R.Args.back().Loc->SourceLine = *Arg.SourceLine; + R.Args.back().Loc->SourceColumn = *Arg.SourceColumn; } else return SourceFileName.takeError(); } diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.h b/llvm/lib/Remarks/BitstreamRemarkParser.h index 257ac46eb9495..cba805dc24b59 100644 --- a/llvm/lib/Remarks/BitstreamRemarkParser.h +++ b/llvm/lib/Remarks/BitstreamRemarkParser.h @@ -13,15 +13,14 @@ #ifndef LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H #define LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/Remarks/BitstreamRemarkContainer.h" -#include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkParser.h" -#include "llvm/Remarks/RemarkStringTable.h" #include "llvm/Support/Error.h" -#include "llvm/Support/FormatVariadic.h" +#include #include #include #include @@ -29,156 +28,66 @@ namespace llvm { namespace remarks { -class BitstreamBlockParserHelperBase { -protected: - BitstreamCursor &Stream; - - StringRef BlockName; - unsigned BlockID; - -public: - BitstreamBlockParserHelperBase(BitstreamCursor &Stream, unsigned BlockID, - StringRef BlockName) - : Stream(Stream), BlockName(BlockName), BlockID(BlockID) {} - - template Error error(char const *Fmt, const Ts &...Vals) { - std::string Buffer; - raw_string_ostream OS(Buffer); - OS << "Error while parsing " << BlockName << " block: "; - OS << formatv(Fmt, Vals...); - return make_error( - std::move(Buffer), - std::make_error_code(std::errc::illegal_byte_sequence)); - } - - Error expectBlock(); - -protected: - Error enterBlock(); - - Error unknownRecord(unsigned AbbrevID); - Error unexpectedRecord(StringRef RecordName); - Error malformedRecord(StringRef RecordName); - Error unexpectedBlock(unsigned Code); -}; - -template -class BitstreamBlockParserHelper : public BitstreamBlockParserHelperBase { -protected: - using BitstreamBlockParserHelperBase::BitstreamBlockParserHelperBase; - Derived &derived() { return *static_cast(this); } - - /// Parse a record and fill in the fields in the parser. - /// The subclass can statically override this method. - Error parseRecord(unsigned Code) { return unexpectedRecord(Code); } - - /// Parse a subblock and fill in the fields in the parser. - /// The subclass can statically override this method. - Error parseSubBlock(unsigned Code) { return unexpectedBlock(Code); } - -public: - /// Enter, parse, and leave this bitstream block. This expects the - /// BitstreamCursor to be right after the SubBlock entry (i.e. after calling - /// expectBlock). - Error parseBlock() { - if (Error E = enterBlock()) - return E; - - // Stop when there is nothing to read anymore or when we encounter an - // END_BLOCK. - while (true) { - Expected Next = Stream.advance(); - if (!Next) - return Next.takeError(); - switch (Next->Kind) { - case BitstreamEntry::SubBlock: - if (Error E = derived().parseSubBlock(Next->ID)) - return E; - continue; - case BitstreamEntry::EndBlock: - return Error::success(); - case BitstreamEntry::Record: - if (Error E = derived().parseRecord(Next->ID)) - return E; - continue; - case BitstreamEntry::Error: - return error("Unexpected end of bitstream."); - } - llvm_unreachable("Unexpected BitstreamEntry"); - } - } -}; +struct Remark; /// Helper to parse a META_BLOCK for a bitstream remark container. -class BitstreamMetaParserHelper - : public BitstreamBlockParserHelper { - friend class BitstreamBlockParserHelper; - -public: - struct ContainerInfo { - uint64_t Version; - uint64_t Type; - }; - - /// The parsed content: depending on the container type, some fields might - /// be empty. - std::optional Container; - std::optional RemarkVersion; - std::optional ExternalFilePath; +struct BitstreamMetaParserHelper { + /// The Bitstream reader. + BitstreamCursor &Stream; + /// Reference to the storage for the block info. + BitstreamBlockInfo &BlockInfo; + /// The parsed content: depending on the container type, some fields might be + /// empty. + std::optional ContainerVersion; + std::optional ContainerType; std::optional StrTabBuf; + std::optional ExternalFilePath; + std::optional RemarkVersion; - BitstreamMetaParserHelper(BitstreamCursor &Stream) - : BitstreamBlockParserHelper(Stream, META_BLOCK_ID, MetaBlockName) {} + /// Continue parsing with \p Stream. \p Stream is expected to contain a + /// ENTER_SUBBLOCK to the META_BLOCK at the current position. + /// \p Stream is expected to have a BLOCKINFO_BLOCK set. + BitstreamMetaParserHelper(BitstreamCursor &Stream, + BitstreamBlockInfo &BlockInfo); -protected: - Error parseRecord(unsigned Code); + /// Parse the META_BLOCK and fill the available entries. + /// This helper does not check for the validity of the fields. + Error parse(); }; /// Helper to parse a REMARK_BLOCK for a bitstream remark container. -class BitstreamRemarkParserHelper - : public BitstreamBlockParserHelper { - friend class BitstreamBlockParserHelper; - -protected: - SmallVector Record; - StringRef RecordBlob; - unsigned RecordID; - -public: - struct RemarkLoc { - uint64_t SourceFileNameIdx; - uint64_t SourceLine; - uint64_t SourceColumn; - }; - - struct Argument { - std::optional KeyIdx; - std::optional ValueIdx; - std::optional Loc; - - Argument(std::optional KeyIdx, std::optional ValueIdx) - : KeyIdx(KeyIdx), ValueIdx(ValueIdx) {} - }; - +struct BitstreamRemarkParserHelper { + /// The Bitstream reader. + BitstreamCursor &Stream; /// The parsed content: depending on the remark, some fields might be empty. std::optional Type; std::optional RemarkNameIdx; std::optional PassNameIdx; std::optional FunctionNameIdx; + std::optional SourceFileNameIdx; + std::optional SourceLine; + std::optional SourceColumn; std::optional Hotness; - std::optional Loc; - - SmallVector Args; - - BitstreamRemarkParserHelper(BitstreamCursor &Stream) - : BitstreamBlockParserHelper(Stream, REMARK_BLOCK_ID, RemarkBlockName) {} - - /// Clear helper state and parse next remark block. - Error parseNext(); - -protected: - Error parseRecord(unsigned Code); - Error handleRecord(); + struct Argument { + std::optional KeyIdx; + std::optional ValueIdx; + std::optional SourceFileNameIdx; + std::optional SourceLine; + std::optional SourceColumn; + }; + std::optional> Args; + /// Avoid re-allocating a vector every time. + SmallVector TmpArgs; + + /// Continue parsing with \p Stream. \p Stream is expected to contain a + /// ENTER_SUBBLOCK to the REMARK_BLOCK at the current position. + /// \p Stream is expected to have a BLOCKINFO_BLOCK set and to have already + /// parsed the META_BLOCK. + BitstreamRemarkParserHelper(BitstreamCursor &Stream); + + /// Parse the REMARK_BLOCK and fill the available entries. + /// This helper does not check for the validity of the fields. + Error parse(); }; /// Helper to parse any bitstream remark container. @@ -189,15 +98,21 @@ struct BitstreamParserHelper { BitstreamBlockInfo BlockInfo; /// Start parsing at \p Buffer. BitstreamParserHelper(StringRef Buffer); - /// Parse and validate the magic number. - Error expectMagic(); - /// Advance to the meta block - Error advanceToMetaBlock(); + /// Parse the magic number. + Expected> parseMagic(); /// Parse the block info block containing all the abbrevs. /// This needs to be called before calling any other parsing function. Error parseBlockInfoBlock(); + /// Return true if the next block is a META_BLOCK. This function does not move + /// the cursor. + Expected isMetaBlock(); + /// Return true if the next block is a REMARK_BLOCK. This function does not + /// move the cursor. + Expected isRemarkBlock(); /// Return true if the parser reached the end of the stream. bool atEndOfStream() { return Stream.AtEndOfStream(); } + /// Jump to the end of the stream, skipping everything. + void skipToEnd() { return Stream.skipToEnd(); } }; /// Parses and holds the state of the latest parsed remark. @@ -234,16 +149,14 @@ struct BitstreamRemarkParser : public RemarkParser { Expected> parseRemark(); private: + /// Helper functions. Error processCommonMeta(BitstreamMetaParserHelper &Helper); Error processStandaloneMeta(BitstreamMetaParserHelper &Helper); Error processSeparateRemarksFileMeta(BitstreamMetaParserHelper &Helper); Error processSeparateRemarksMetaMeta(BitstreamMetaParserHelper &Helper); - Error processExternalFilePath(BitstreamMetaParserHelper &Helper); - Error processStrTab(BitstreamMetaParserHelper &Helper); - Error processRemarkVersion(BitstreamMetaParserHelper &Helper); - Expected> processRemark(BitstreamRemarkParserHelper &Helper); + Error processExternalFilePath(std::optional ExternalFilePath); }; Expected> createBitstreamParserFromMeta( From af144d582ea16483670be3fc00df5131deaa0fd2 Mon Sep 17 00:00:00 2001 From: Orlando Cazalet-Hyams Date: Mon, 15 Sep 2025 15:38:09 +0100 Subject: [PATCH 348/734] [Dexter] llvm-lit: Always log DAP messages to stderr (#158586) This will help diagnose flaky buildbots. The stderr output is only shown by lit if the test fails. --- .../debuginfo-tests/dexter/dex/debugger/DAP.py | 9 ++++++++- .../debuginfo-tests/dexter/dex/debugger/Debuggers.py | 8 ++++++-- cross-project-tests/lit.cfg.py | 6 ++++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py index 4e64f880487f5..a849990678d42 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DAP.py @@ -59,10 +59,17 @@ def _custom_enter(self): if self.log_file == "-": self.out_handle = sys.stdout return + if self.log_file == "-e": + self.out_handle = sys.stderr + return self.out_handle = open(self.log_file, "w+", encoding="utf-8") def _custom_exit(self): - if self.out_handle is not None and self.log_file != "-": + if ( + self.out_handle is not None + and self.log_file != "-" + and self.log_file != "-e" + ): self.out_handle.close() self.open = False diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py index ef23dcf6bebbc..0232bdeb64b57 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py @@ -74,7 +74,7 @@ def add_debugger_tool_base_arguments(parser, defaults): type=str, metavar="", default=None, - help="log file for messages between Dexter and the debug adapter; set to '-' to log to stdout", + help="log file for messages between Dexter and the debug adapter; set to '-' to log to stdout, '-e' to log to stderr", ) dap_group.add_argument( "--colorize-dap-log", @@ -186,7 +186,11 @@ def handle_debugger_tool_base_options(context, defaults): # noqa 'could not find "{}"'.format(options.lldb_executable) ) - if options.dap_message_log is not None and options.dap_message_log != "-": + if ( + options.dap_message_log is not None + and options.dap_message_log != "-" + and options.dap_message_log != "-e" + ): options.dap_message_log = os.path.abspath(options.dap_message_log) diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index f042c27aece9f..e702a7739f511 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -121,7 +121,7 @@ def configure_dexter_substitutions(): tools.append( ToolSubst( "%dexter_lldb_args", - f'--lldb-executable "{lldb_dap_path}" --debugger lldb-dap', + f'--lldb-executable "{lldb_dap_path}" --debugger lldb-dap --dap-message-log=-e', ) ) @@ -148,7 +148,9 @@ def configure_dexter_substitutions(): dexter_regression_test_c_builder = "clang" dexter_regression_test_cxx_builder = "clang++" dexter_regression_test_debugger = "lldb-dap" - dexter_regression_test_additional_flags = f'--lldb-executable "{lldb_dap_path}"' + dexter_regression_test_additional_flags = ( + f'--lldb-executable "{lldb_dap_path}" --dap-message-log=-e' + ) dexter_regression_test_c_flags = "-O0 -glldb -std=gnu11" dexter_regression_test_cxx_flags = "-O0 -glldb -std=gnu++11" From 959c3b627fd4084ae583c80703b88b91f63f9a0e Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Mon, 15 Sep 2025 23:39:25 +0900 Subject: [PATCH 349/734] [clang][ARM] Include arm_acle.h in intrin.h on supported platforms (#144172) Right now when using ARM intrinsics without including `arm_acle.h`, we throw a warning saying to include it or provide a declaration for the function. MSVC doesn't have any ARM-intrinsic specific header, so include Clang's ARM intrinsic header (`arm_acle.h`) in `intrin.h` so we don't get a warning that tells the user to include a header that doesn't exist. Ideally we could change the header based on the platform, but we don't have the infra for that at the moment. See https://github.com/llvm/llvm-project/pull/140910 for more info. Signed-off-by: Sarnie, Nick --- clang/lib/Headers/intrin.h | 4 ++++ clang/test/Headers/arm-acle-no-direct-include.c | 8 ++++++++ 2 files changed, 12 insertions(+) create mode 100644 clang/test/Headers/arm-acle-no-direct-include.c diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index 588c283cbdfba..210ed0c1f773b 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -30,6 +30,10 @@ #include #endif +#if defined(__ARM_ACLE) +#include +#endif + /* For the definition of jmp_buf. */ #if __STDC_HOSTED__ #include diff --git a/clang/test/Headers/arm-acle-no-direct-include.c b/clang/test/Headers/arm-acle-no-direct-include.c new file mode 100644 index 0000000000000..b69549d92e4b0 --- /dev/null +++ b/clang/test/Headers/arm-acle-no-direct-include.c @@ -0,0 +1,8 @@ +// RUN: %clang_cl --target=aarch64-windows-msvc -Xclang -verify /E -U__STDC_HOSTED__ -Wno-builtin-macro-redefined %s 2>&1 | FileCheck %s + +// expected-no-diagnostics + +// CHECK: void __yield(void); +#include +void f() { __yield(); } + From 32ab6ff9f95739cba9954e666479d30e126af53c Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Mon, 15 Sep 2025 15:45:49 +0100 Subject: [PATCH 350/734] [NFC][Flang] Move bounds helper functions to Util header. (#154164) This PR moves the `needsBoundsOps` and `genBoundsOps` helper functions to `flang/include/flang/Optimizer/OpenMP/Utils.h`. --- flang/include/flang/Optimizer/OpenMP/Utils.h | 40 +++++++++++++++++++ flang/lib/Lower/OpenMP/OpenMP.cpp | 11 +++-- .../Optimizer/OpenMP/AutomapToTargetData.cpp | 35 ++-------------- .../OpenMP/MapsForPrivatizedSymbols.cpp | 38 ++---------------- 4 files changed, 51 insertions(+), 73 deletions(-) diff --git a/flang/include/flang/Optimizer/OpenMP/Utils.h b/flang/include/flang/Optimizer/OpenMP/Utils.h index 636c768b016b7..235e667130659 100644 --- a/flang/include/flang/Optimizer/OpenMP/Utils.h +++ b/flang/include/flang/Optimizer/OpenMP/Utils.h @@ -13,6 +13,17 @@ #ifndef FORTRAN_OPTIMIZER_OPENMP_UTILS_H #define FORTRAN_OPTIMIZER_OPENMP_UTILS_H +#include "flang/Optimizer/Builder/BoxValue.h" +#include "flang/Optimizer/Builder/DirectivesCommon.h" +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Dialect/FIRType.h" + +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/Value.h" + +#include "llvm/ADT/SmallVector.h" + namespace flangomp { enum class DoConcurrentMappingKind { @@ -21,6 +32,35 @@ enum class DoConcurrentMappingKind { DCMK_Device ///< Lower to run in parallel on the GPU. }; +/// Return true if the variable has a dynamic size and therefore requires +/// bounds operations to describe its extents. +inline bool needsBoundsOps(mlir::Value var) { + assert(mlir::isa(var.getType()) && + "needsBoundsOps can deal only with pointer types"); + mlir::Type t = fir::unwrapRefType(var.getType()); + if (mlir::Type inner = fir::dyn_cast_ptrOrBoxEleTy(t)) + return fir::hasDynamicSize(inner); + return fir::hasDynamicSize(t); +} + +/// Generate MapBoundsOp operations for the variable and append them to +/// `boundsOps`. +inline llvm::SmallVector genBoundsOps(fir::FirOpBuilder &builder, + mlir::Value var, + bool isAssumedSize = false, + bool isOptional = false) { + mlir::Location loc = var.getLoc(); + fir::factory::AddrAndBoundsInfo info = + fir::factory::getDataOperandBaseAddr(builder, var, isOptional, loc); + fir::ExtendedValue exv = + hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr}, + /*contiguousHint=*/true) + .first; + return fir::factory::genImplicitBoundsOps( + builder, info, exv, isAssumedSize, loc); +} + } // namespace flangomp #endif // FORTRAN_OPTIMIZER_OPENMP_UTILS_H diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 0ec33e6b24dbf..9279bc04e7daf 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -30,6 +30,7 @@ #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/OpenMP/Utils.h" #include "flang/Parser/characters.h" #include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree.h" @@ -2496,12 +2497,10 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, Fortran::lower::getDataOperandBaseAddr( converter, firOpBuilder, sym.GetUltimate(), converter.getCurrentLocation()); - llvm::SmallVector bounds = - fir::factory::genImplicitBoundsOps( - firOpBuilder, info, dataExv, - semantics::IsAssumedSizeArray(sym.GetUltimate()), - converter.getCurrentLocation()); + llvm::SmallVector bounds = flangomp::genBoundsOps( + firOpBuilder, info.rawInput, + semantics::IsAssumedSizeArray(sym.GetUltimate()), + semantics::IsOptional(sym.GetUltimate())); mlir::Value baseOp = info.rawInput; mlir::Type eleType = baseOp.getType(); if (auto refType = mlir::dyn_cast(baseOp.getType())) diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp index 8b9991301aae8..d4be315c167be 100644 --- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp +++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp @@ -13,6 +13,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/OpenMP/Utils.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" @@ -33,36 +34,6 @@ namespace { class AutomapToTargetDataPass : public flangomp::impl::AutomapToTargetDataPassBase< AutomapToTargetDataPass> { - - // Returns true if the variable has a dynamic size and therefore requires - // bounds operations to describe its extents. - inline bool needsBoundsOps(mlir::Value var) { - assert(mlir::isa(var.getType()) && - "only pointer like types expected"); - mlir::Type t = fir::unwrapRefType(var.getType()); - if (mlir::Type inner = fir::dyn_cast_ptrOrBoxEleTy(t)) - return fir::hasDynamicSize(inner); - return fir::hasDynamicSize(t); - } - - // Generate MapBoundsOp operations for the variable if required. - inline void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var, - llvm::SmallVectorImpl &boundsOps) { - mlir::Location loc = var.getLoc(); - fir::factory::AddrAndBoundsInfo info = - fir::factory::getDataOperandBaseAddr(builder, var, - /*isOptional=*/false, loc); - fir::ExtendedValue exv = - hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr}, - /*contiguousHint=*/true) - .first; - llvm::SmallVector tmp = - fir::factory::genImplicitBoundsOps( - builder, info, exv, /*dataExvIsAssumedSize=*/false, loc); - llvm::append_range(boundsOps, tmp); - } - void findRelatedAllocmemFreemem(fir::AddrOfOp addressOfOp, llvm::DenseSet &allocmems, llvm::DenseSet &freemems) { @@ -112,8 +83,8 @@ class AutomapToTargetDataPass auto addMapInfo = [&](auto globalOp, auto memOp) { builder.setInsertionPointAfter(memOp); SmallVector bounds; - if (needsBoundsOps(memOp.getMemref())) - genBoundsOps(builder, memOp.getMemref(), bounds); + if (flangomp::needsBoundsOps(memOp.getMemref())) + bounds = flangomp::genBoundsOps(builder, memOp.getMemref()); omp::TargetEnterExitUpdateDataOperands clauses; mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create( diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp index 30328573b74fc..5c1a3d232a8c9 100644 --- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp +++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp @@ -29,6 +29,7 @@ #include "flang/Optimizer/Dialect/Support/KindMapping.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/OpenMP/Passes.h" +#include "flang/Optimizer/OpenMP/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" @@ -106,8 +107,8 @@ class MapsForPrivatizedSymbolsPass // Figure out the bounds because knowing the bounds will help the subsequent // MapInfoFinalizationPass map the underlying data of the descriptor. llvm::SmallVector boundsOps; - if (needsBoundsOps(varPtr)) - genBoundsOps(builder, varPtr, boundsOps); + if (flangomp::needsBoundsOps(varPtr)) + boundsOps = flangomp::genBoundsOps(builder, varPtr); mlir::omp::VariableCaptureKind captureKind = mlir::omp::VariableCaptureKind::ByRef; @@ -194,38 +195,5 @@ class MapsForPrivatizedSymbolsPass } } } - // As the name suggests, this function examines var to determine if - // it has dynamic size. If true, this pass'll have to extract these - // bounds from descriptor of var and add the bounds to the resultant - // MapInfoOp. - bool needsBoundsOps(mlir::Value var) { - assert(mlir::isa(var.getType()) && - "needsBoundsOps can deal only with pointer types"); - mlir::Type t = fir::unwrapRefType(var.getType()); - // t could be a box, so look inside the box - auto innerType = fir::dyn_cast_ptrOrBoxEleTy(t); - if (innerType) - return fir::hasDynamicSize(innerType); - return fir::hasDynamicSize(t); - } - - void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var, - llvm::SmallVector &boundsOps) { - mlir::Location loc = var.getLoc(); - fir::factory::AddrAndBoundsInfo info = - fir::factory::getDataOperandBaseAddr(builder, var, - /*isOptional=*/false, loc); - fir::ExtendedValue extendedValue = - hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr}, - /*continguousHint=*/true) - .first; - llvm::SmallVector boundsOpsVec = - fir::factory::genImplicitBoundsOps( - builder, info, extendedValue, - /*dataExvIsAssumedSize=*/false, loc); - for (auto bounds : boundsOpsVec) - boundsOps.push_back(bounds); - } }; } // namespace From 0cca9e4baa0073a5a6f46b1b09dbd6ed290ae619 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 15 Sep 2025 07:55:37 -0700 Subject: [PATCH 351/734] [compiler-rt] Remove enable_execute_stack support on arm64 Darwin (#158386) `enable_execute_stack` is not supported on arm64 Darwin because: - It calls mprotect with `PROT_WRITE | PROT_EXEC`, which is rejected on this platform. - It assumes a fixed 4K page size, which is not guaranteed. This change disables building `enable_execute_stack` on arm64 Darwin and fixes the failing test: `compiler-rt/test/builtins/Unit/enable_execute_stack_test.c`. rdar://159705691 --- compiler-rt/lib/builtins/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 1dadb6a810efb..0d7fc65cfd3e9 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -674,6 +674,11 @@ if (MINGW) ) endif() +# Don't build enable_execute_stack on arm64 darwin. +if (APPLE) + list(REMOVE_ITEM aarch64_SOURCES enable_execute_stack.c) +endif() + set(amdgcn_SOURCES ${GENERIC_SOURCES}) set(armv4t_SOURCES ${arm_min_SOURCES}) From 99ec5b95da4fa4e32cf9854513413510d48781f6 Mon Sep 17 00:00:00 2001 From: Abid Qadeer Date: Mon, 15 Sep 2025 15:57:33 +0100 Subject: [PATCH 352/734] [flang][driver] Support -gdwarf-N option. (#158314) This PR adds the support for -gdwarf-N option where allows user to choose the version of the dwarf. Currently N can be 2, 3, 4, or 5. This is only the driver part of the change. Later PRs will propogate it to the IR. Fixes https://github.com/llvm/llvm-project/issues/112910. --- clang/include/clang/Driver/Options.td | 9 ++++--- clang/lib/Driver/ToolChains/Flang.cpp | 7 +++++- .../include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 4 ++++ flang/test/Driver/flang-dwarf-version.f90 | 24 +++++++++++++++++++ 5 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 flang/test/Driver/flang-dwarf-version.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 47d328f862e07..def7c09d58cfb 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4682,8 +4682,10 @@ def gdbx : Flag<["-"], "gdbx">, Group; // Equivalent to our default dwarf version. Forces usual dwarf emission when // CodeView is enabled. def gdwarf : Flag<["-"], "gdwarf">, Group, - Visibility<[ClangOption, CLOption, DXCOption]>, + Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>, HelpText<"Generate source-level debug information with the default dwarf version">; + +let Visibility = [ClangOption, FlangOption] in { def gdwarf_2 : Flag<["-"], "gdwarf-2">, Group, HelpText<"Generate source-level debug information with dwarf version 2">; def gdwarf_3 : Flag<["-"], "gdwarf-3">, Group, @@ -4692,6 +4694,7 @@ def gdwarf_4 : Flag<["-"], "gdwarf-4">, Group, HelpText<"Generate source-level debug information with dwarf version 4">; def gdwarf_5 : Flag<["-"], "gdwarf-5">, Group, HelpText<"Generate source-level debug information with dwarf version 5">; +} def gdwarf64 : Flag<["-"], "gdwarf64">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption]>, HelpText<"Enables DWARF64 format for ELF binaries, if debug information emission is enabled.">, @@ -7633,6 +7636,8 @@ def debug_info_kind_EQ : Joined<["-"], "debug-info-kind=">; def record_command_line : Separate<["-"], "record-command-line">, HelpText<"The string to embed in the .LLVM.command.line section.">, MarshallingInfoString>; +def dwarf_version_EQ : Joined<["-"], "dwarf-version=">, + MarshallingInfoInt>; } // let Visibility = [CC1Option, CC1AsOption, FC1Option] @@ -7644,8 +7649,6 @@ def debug_info_macro : Flag<["-"], "debug-info-macro">, def default_function_attr : Separate<["-"], "default-function-attr">, HelpText<"Apply given attribute to all functions">, MarshallingInfoStringVector>; -def dwarf_version_EQ : Joined<["-"], "dwarf-version=">, - MarshallingInfoInt>; def debugger_tuning_EQ : Joined<["-"], "debugger-tuning=">, Values<"gdb,lldb,sce,dbx">, NormalizedValuesScope<"llvm::DebuggerKind">, NormalizedValues<["GDB", "LLDB", "SCE", "DBX"]>, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index d3f4af164f672..12e510ab1562d 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -134,12 +134,17 @@ void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const { if (Args.hasArg(options::OPT_gN_Group)) { Arg *gNArg = Args.getLastArg(options::OPT_gN_Group); DebugInfoKind = debugLevelToInfoKind(*gNArg); - } else if (Args.hasArg(options::OPT_g_Flag)) { + } else if (Args.hasArg(options::OPT_g_Group)) { DebugInfoKind = llvm::codegenoptions::FullDebugInfo; } else { DebugInfoKind = llvm::codegenoptions::NoDebugInfo; } addDebugInfoKind(CmdArgs, DebugInfoKind); + if (getDwarfNArg(Args)) { + const unsigned DwarfVersion = getDwarfVersion(getToolChain(), Args); + CmdArgs.push_back( + Args.MakeArgString("-dwarf-version=" + Twine(DwarfVersion))); + } } void Flang::addCodegenOptions(const ArgList &Args, diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index edab48a70d29d..f09159962883f 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -47,6 +47,7 @@ CODEGENOPT(FuseLoops, 1, 0) ///< Enable loop fusion. CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass +CODEGENOPT(DwarfVersion, 3, 0) ///< Dwarf version CODEGENOPT(Underscoring, 1, 1) ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use. diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 4f42fbd66eac0..a00e568bb4a54 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -157,6 +157,10 @@ static bool parseDebugArgs(Fortran::frontend::CodeGenOptions &opts, clang::DiagnosticsEngine::Warning, "Unsupported debug option: %0"); diags.Report(debugWarning) << arg->getValue(); } + // The default value of 2 here is to match clang. + opts.DwarfVersion = + getLastArgIntValue(args, clang::driver::options::OPT_dwarf_version_EQ, + /*Default=*/2, diags); } return true; } diff --git a/flang/test/Driver/flang-dwarf-version.f90 b/flang/test/Driver/flang-dwarf-version.f90 new file mode 100644 index 0000000000000..dc69140a7eda1 --- /dev/null +++ b/flang/test/Driver/flang-dwarf-version.f90 @@ -0,0 +1,24 @@ +// RUN: %flang -### -S %s -g -gdwarf-5 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-DWARF5 %s +// RUN: %flang -### -S %s -gdwarf-5 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-DWARF5 %s +// RUN: %flang -### -S %s -g1 -gdwarf-5 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-WITH-G1-DWARF5 %s +// RUN: %flang -### -S %s -gdwarf-4 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-DWARF4 %s +// RUN: %flang -### -S %s -gdwarf-3 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-DWARF3 %s +// RUN: %flang -### -S %s -gdwarf-2 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-DWARF2 %s + +// CHECK-DWARF5: -debug-info-kind=standalone +// CHECK-DWARF5-SAME: -dwarf-version=5 + +// CHECK-WITH-G1-DWARF5: -debug-info-kind=line-tables-only +// CHECK-WITH-G1-DWARF5-SAME: -dwarf-version=5 + +// CHECK-DWARF4: -dwarf-version=4 + +// CHECK-DWARF3: -dwarf-version=3 + +// CHECK-DWARF2: -dwarf-version=2 From e299d9ac744600d837b45cb0be9d000b4ecbf2a3 Mon Sep 17 00:00:00 2001 From: sstipano Date: Mon, 15 Sep 2025 16:59:42 +0200 Subject: [PATCH 353/734] [AMDGPU][NFC] Refactor FLAT_Global_* pseudos. (#120244) --- llvm/lib/Target/AMDGPU/FLATInstructions.td | 155 +++++++++------------ 1 file changed, 68 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index a1306565bbe29..7e5ae25ff30e6 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -228,15 +228,14 @@ class GlobalSaddrTable { // saddr is 32-bit (which isn't handled here yet). class FLAT_Load_Pseudo< string opName, RegisterOperand vdata_op, bit HasTiedOutput = 0, - bit HasSaddr = 0, bit EnableSaddr = 0> + bit HasSaddr = 0, bit EnableSaddr = 0, + RegisterClass VaddrRC = !if(EnableSaddr, VGPR_32, VReg_64)> : FLAT_Pseudo { let OutOperandList = (outs vdata_op:$vdst); let InOperandList = !con( - !if(EnableSaddr, - (ins SReg_64_XEXEC_XNULL:$saddr, VGPR_32:$vaddr), - (ins VReg_64:$vaddr)), - (ins flat_offset:$offset), + !if(EnableSaddr, (ins SReg_64_XEXEC_XNULL:$saddr), (ins)), + (ins VaddrRC:$vaddr, flat_offset:$offset), // FIXME: Operands with default values do not work with following // non-optional operands. !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), @@ -268,15 +267,13 @@ multiclass FLAT_Flat_Load_Pseudo_t16 { } class FLAT_Store_Pseudo : FLAT_Pseudo< - opName, - (outs), - !con( - !if(EnableSaddr, - (ins VGPR_32:$vaddr, vdataClass:$vdata, SReg_64_XEXEC_XNULL:$saddr), - (ins VReg_64:$vaddr, vdataClass:$vdata)), - (ins flat_offset:$offset, CPol_0:$cpol)), - " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { + bit HasSaddr = 0, bit EnableSaddr = 0, + RegisterClass VaddrRC = !if(EnableSaddr, VGPR_32, VReg_64)> : FLAT_Pseudo { + let InOperandList = !con( + (ins VaddrRC:$vaddr, vdataClass:$vdata), + !if(EnableSaddr, (ins SReg_64_XEXEC_XNULL:$saddr), (ins)), + (ins flat_offset:$offset, CPol_0:$cpol)); + let AsmOperands = " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"; let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -833,99 +830,83 @@ multiclass FLAT_Atomic_Pseudo< defm "" : FLAT_Atomic_Pseudo_RTN; } -multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< +class FLAT_Global_Atomic_Pseudo_NO_RTN< string opName, RegisterOperand vdst_op, ValueType vt, ValueType data_vt = vt, - RegisterOperand data_op = vdst_op> { - - let is_flat_global = 1 in { - def "" : FLAT_AtomicNoRet_Pseudo , - GlobalSaddrTable<0, opName> { - let has_saddr = 1; - let FPAtomic = data_vt.isFP; - } - - def _SADDR : FLAT_AtomicNoRet_Pseudo , - GlobalSaddrTable<1, opName> { - let has_saddr = 1; - let enabled_saddr = 1; - let FPAtomic = data_vt.isFP; - } - } + RegisterOperand data_op = vdst_op, + bit EnableSaddr = false, + RegisterClass VaddrRC = !if(EnableSaddr, VGPR_32, VReg_64)> + : FLAT_AtomicNoRet_Pseudo, GlobalSaddrTable { + let InOperandList = !con( + (ins VaddrRC:$vaddr, data_op:$vdata), + !if(EnableSaddr, (ins SReg_64_XEXEC_XNULL:$saddr), (ins)), + (ins flat_offset:$offset, CPol_0:$cpol)); + let AsmOperands = " $vaddr, $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let FPAtomic = data_vt.isFP; + let is_flat_global = 1; } -multiclass FLAT_Global_Atomic_Pseudo_RTN< - string opName, +multiclass FLAT_Global_Atomic_Pseudo_Helper_NO_RTN { + def "" : FLAT_Global_Atomic_Pseudo_NO_RTN; + def _SADDR : FLAT_Global_Atomic_Pseudo_NO_RTN; +} - defvar vdst_op_vgpr = getEquivalentVGPROperand.ret; - defvar data_op_vgpr = getEquivalentVGPROperand.ret; - - let is_flat_global = 1 in { - def _RTN : FLAT_AtomicRet_Pseudo , - GlobalSaddrTable<0, opName#"_rtn"> { - let has_saddr = 1; - let FPAtomic = data_vt.isFP; - } +class FLAT_Global_Atomic_Pseudo_RTN< + string opName, + RegisterOperand vdst_op, + ValueType vt, + ValueType data_vt = vt, + RegisterOperand data_op = vdst_op, + bit EnableSaddr = false, + bit IsVGPR = false, + RegisterClass VaddrRC = !if(EnableSaddr, VGPR_32, VReg_64)> + : FLAT_AtomicRet_Pseudo, GlobalSaddrTable { - def _SADDR_RTN : FLAT_AtomicRet_Pseudo , - GlobalSaddrTable<1, opName#"_rtn"> { - let has_saddr = 1; - let enabled_saddr = 1; - let FPAtomic = data_vt.isFP; - } + defvar vdst_rc= !if(IsVGPR, getEquivalentVGPROperand.ret, getEquivalentAGPROperand.ret); + defvar data_rc = !if(IsVGPR, getEquivalentVGPROperand.ret, getEquivalentAGPROperand.ret); - defvar vdst_op_agpr = getEquivalentAGPROperand.ret; - defvar data_op_agpr = getEquivalentAGPROperand.ret; + let OutOperandList = (outs vdst_rc:$vdst); + let InOperandList = !con( + (ins VaddrRC:$vaddr, data_rc:$vdata), + !if(EnableSaddr, (ins SReg_64_XEXEC_XNULL:$saddr), (ins)), + (ins flat_offset:$offset, CPol_GLC1:$cpol)); + let AsmOperands = " $vdst, $vaddr, $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let FPAtomic = data_vt.isFP; + let is_flat_global = 1; +} +multiclass FLAT_Global_Atomic_Pseudo_Helper_RTN { + def _RTN : FLAT_Global_Atomic_Pseudo_RTN; + def _SADDR_RTN : FLAT_Global_Atomic_Pseudo_RTN; let SubtargetPredicate = isGFX90APlus in { - def _RTN_agpr : FLAT_AtomicRet_Pseudo , - GlobalSaddrTable<0, opName#"_rtn_agpr"> { - let has_saddr = 1; - let FPAtomic = data_vt.isFP; - } - - def _SADDR_RTN_agpr : FLAT_AtomicRet_Pseudo , - GlobalSaddrTable<1, opName#"_rtn_agpr"> { - let has_saddr = 1; - let enabled_saddr = 1; - let FPAtomic = data_vt.isFP; - } - } + def _RTN_agpr : FLAT_Global_Atomic_Pseudo_RTN; + def _SADDR_RTN_agpr : FLAT_Global_Atomic_Pseudo_RTN; } } + multiclass FLAT_Global_Atomic_Pseudo< string opName, RegisterOperand vdst_rc, ValueType vt, ValueType data_vt = vt, RegisterOperand data_rc = vdst_rc> { - defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN; - defm "" : FLAT_Global_Atomic_Pseudo_RTN; + defm "" : FLAT_Global_Atomic_Pseudo_Helper_NO_RTN; + defm "" : FLAT_Global_Atomic_Pseudo_Helper_RTN; } //===----------------------------------------------------------------------===// @@ -1356,19 +1337,19 @@ let SubtargetPredicate = isGFX10Plus in { } // End SubtargetPredicate = isGFX10Plus let SubtargetPredicate = HasAtomicFaddNoRtnInsts in - defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < + defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_Helper_NO_RTN < "global_atomic_add_f32", AVLdSt_32, f32 >; let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in - defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < + defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_Helper_NO_RTN < "global_atomic_pk_add_f16", AVLdSt_32, v2f16 >; let SubtargetPredicate = HasAtomicFaddRtnInsts in - defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < + defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_Helper_RTN < "global_atomic_add_f32", AVLdSt_32, f32 >; let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in - defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < + defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_Helper_RTN < "global_atomic_pk_add_f16", AVLdSt_32, v2f16 >; From c88f3c582dc2ef5f2fdfd0c5887f5f7562f49095 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Mon, 15 Sep 2025 10:59:53 -0400 Subject: [PATCH 354/734] [mlir] Add base class type aliases for rewrites/conversions. NFC. (#158433) This is to simplify writing rewrite/conversion patterns that usually start with: ```c++ struct MyPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; ``` and allow for: ```c++ struct MyPattern : public OpRewritePattern { using Base::Base; ``` similar to how we enable it for pass classes. --- mlir/include/mlir/IR/PatternMatch.h | 10 ++++++++++ mlir/include/mlir/Transforms/DialectConversion.h | 16 ++++++++++++++++ mlir/test/lib/Dialect/Test/TestPatterns.cpp | 11 ++++++++--- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index 7b0b9cef9c5bd..576481a6e7215 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -312,6 +312,9 @@ struct OpOrInterfaceRewritePatternBase : public RewritePattern { template struct OpRewritePattern : public mlir::detail::OpOrInterfaceRewritePatternBase { + /// Type alias to allow derived classes to inherit constructors with + /// `using Base::Base;`. + using Base = OpRewritePattern; /// Patterns must specify the root operation name they match against, and can /// also specify the benefit of the pattern matching and a list of generated @@ -328,6 +331,9 @@ struct OpRewritePattern template struct OpInterfaceRewritePattern : public mlir::detail::OpOrInterfaceRewritePatternBase { + /// Type alias to allow derived classes to inherit constructors with + /// `using Base::Base;`. + using Base = OpInterfaceRewritePattern; OpInterfaceRewritePattern(MLIRContext *context, PatternBenefit benefit = 1) : mlir::detail::OpOrInterfaceRewritePatternBase( @@ -341,6 +347,10 @@ struct OpInterfaceRewritePattern template